1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net> 3cabdff1aSopenharmony_ci * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 23cabdff1aSopenharmony_ci#include "neon.S" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci.macro qpel_lowpass r0, r1, rc1, rc2, shift 26cabdff1aSopenharmony_ci vext.8 d25, \r0, \r1, #1 @ src[-1] 27cabdff1aSopenharmony_ci vext.8 d26, \r0, \r1, #4 @ src[ 2] 28cabdff1aSopenharmony_ci vext.8 d24, \r0, \r1, #5 @ src[ 3] 29cabdff1aSopenharmony_ci vaddl.u8 q9, d25, d26 30cabdff1aSopenharmony_ci vaddl.u8 q8, \r0, d24 31cabdff1aSopenharmony_ci vext.8 d27, \r0, \r1, #2 @ src[ 0] 32cabdff1aSopenharmony_ci vshl.s16 q12, q9, #2 33cabdff1aSopenharmony_ci vsub.s16 q8, q8, q9 34cabdff1aSopenharmony_ci vext.8 d28, \r0, \r1, #3 @ src[ 1] 35cabdff1aSopenharmony_ci vsub.s16 q8, q8, q12 36cabdff1aSopenharmony_ci vmlal.u8 q8, d27, \rc1 37cabdff1aSopenharmony_ci vmlal.u8 q8, d28, \rc2 38cabdff1aSopenharmony_ci vqrshrun.s16 \r0, q8, #\shift 39cabdff1aSopenharmony_ci.endm 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci.macro qpel_lowpass_x2 r0, r1, r2, r3, rc1, rc2, shift 42cabdff1aSopenharmony_ci vext.8 d25, \r0, \r1, #1 @ src[-1] 43cabdff1aSopenharmony_ci vext.8 d26, \r0, \r1, #4 @ src[ 2] 44cabdff1aSopenharmony_ci vext.8 d24, \r0, \r1, #5 @ src[ 3] 45cabdff1aSopenharmony_ci vaddl.u8 q9, d25, d26 46cabdff1aSopenharmony_ci vaddl.u8 q8, \r0, d24 47cabdff1aSopenharmony_ci vext.8 d29, \r0, \r1, #2 @ src[ 0] 48cabdff1aSopenharmony_ci vext.8 d28, \r0, \r1, #3 @ src[ 1] 49cabdff1aSopenharmony_ci vshl.s16 q10, q9, #2 50cabdff1aSopenharmony_ci vext.8 \r1, \r2, \r3, #1 @ src[-1] 51cabdff1aSopenharmony_ci vsub.s16 q8, q8, q9 52cabdff1aSopenharmony_ci vext.8 d22, \r2, \r3, #4 @ src[ 2] 53cabdff1aSopenharmony_ci vext.8 \r0, \r2, \r3, #5 @ src[ 3] 54cabdff1aSopenharmony_ci vaddl.u8 q13, \r1, d22 55cabdff1aSopenharmony_ci vaddl.u8 q12, \r2, \r0 56cabdff1aSopenharmony_ci vsub.s16 q8, q8, q10 57cabdff1aSopenharmony_ci vshl.s16 q9, q13, #2 58cabdff1aSopenharmony_ci vsub.s16 q12, q12, q13 59cabdff1aSopenharmony_ci vmlal.u8 q8, d29, \rc1 60cabdff1aSopenharmony_ci vmlal.u8 q8, d28, \rc2 61cabdff1aSopenharmony_ci vsub.s16 q12, q12, q9 62cabdff1aSopenharmony_ci vext.8 d26, \r2, \r3, #2 @ src[ 0] 63cabdff1aSopenharmony_ci vext.8 d27, \r2, \r3, #3 @ src[ 1] 64cabdff1aSopenharmony_ci vmlal.u8 q12, d26, \rc1 65cabdff1aSopenharmony_ci vmlal.u8 q12, d27, \rc2 66cabdff1aSopenharmony_ci vqrshrun.s16 \r0, q8, #\shift 67cabdff1aSopenharmony_ci vqrshrun.s16 \r2, q12, #\shift 68cabdff1aSopenharmony_ci.endm 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_ci.macro rv40_qpel8_h shift 71cabdff1aSopenharmony_cifunction put_rv40_qpel8_h_lp_packed_s\shift\()_neon 72cabdff1aSopenharmony_ci1: 73cabdff1aSopenharmony_ci vld1.8 {q2}, [r1], r2 74cabdff1aSopenharmony_ci vld1.8 {q3}, [r1], r2 75cabdff1aSopenharmony_ci qpel_lowpass_x2 d4, d5, d6, d7, d0, d1, \shift 76cabdff1aSopenharmony_ci vst1.8 {d4}, [r12,:64]! 77cabdff1aSopenharmony_ci vst1.8 {d6}, [r12,:64]! 78cabdff1aSopenharmony_ci subs r3, r3, #2 79cabdff1aSopenharmony_ci bgt 1b 80cabdff1aSopenharmony_ci vld1.8 {q2}, [r1] 81cabdff1aSopenharmony_ci qpel_lowpass d4, d5, d0, d1, \shift 82cabdff1aSopenharmony_ci vst1.8 {d4}, [r12,:64]! 83cabdff1aSopenharmony_ci bx lr 84cabdff1aSopenharmony_ciendfunc 85cabdff1aSopenharmony_ci.endm 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_ci.macro rv40_qpel8_v shift, type 88cabdff1aSopenharmony_cifunction \type\()_rv40_qpel8_v_lp_packed_s\shift\()_neon 89cabdff1aSopenharmony_ci vld1.64 {d2}, [r1,:64]! 90cabdff1aSopenharmony_ci vld1.64 {d3}, [r1,:64]! 91cabdff1aSopenharmony_ci vld1.64 {d4}, [r1,:64]! 92cabdff1aSopenharmony_ci vld1.64 {d5}, [r1,:64]! 93cabdff1aSopenharmony_ci vld1.64 {d6}, [r1,:64]! 94cabdff1aSopenharmony_ci vld1.64 {d7}, [r1,:64]! 95cabdff1aSopenharmony_ci vld1.64 {d8}, [r1,:64]! 96cabdff1aSopenharmony_ci vld1.64 {d9}, [r1,:64]! 97cabdff1aSopenharmony_ci vld1.64 {d10}, [r1,:64]! 98cabdff1aSopenharmony_ci vld1.64 {d11}, [r1,:64]! 99cabdff1aSopenharmony_ci vld1.64 {d12}, [r1,:64]! 100cabdff1aSopenharmony_ci vld1.64 {d13}, [r1,:64]! 101cabdff1aSopenharmony_ci vld1.64 {d14}, [r1,:64]! 102cabdff1aSopenharmony_ci transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9 103cabdff1aSopenharmony_ci transpose_8x8 d10, d11, d12, d13, d14, d15, d30, d31 104cabdff1aSopenharmony_ci qpel_lowpass_x2 d2, d10, d3, d11, d0, d1, \shift 105cabdff1aSopenharmony_ci qpel_lowpass_x2 d4, d12, d5, d13, d0, d1, \shift 106cabdff1aSopenharmony_ci qpel_lowpass_x2 d6, d14, d7, d15, d0, d1, \shift 107cabdff1aSopenharmony_ci qpel_lowpass_x2 d8, d30, d9, d31, d0, d1, \shift 108cabdff1aSopenharmony_ci transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9 109cabdff1aSopenharmony_ci .ifc \type,avg 110cabdff1aSopenharmony_ci vld1.64 d12, [r0,:64], r2 111cabdff1aSopenharmony_ci vld1.64 d13, [r0,:64], r2 112cabdff1aSopenharmony_ci vld1.64 d14, [r0,:64], r2 113cabdff1aSopenharmony_ci vld1.64 d15, [r0,:64], r2 114cabdff1aSopenharmony_ci vld1.64 d16, [r0,:64], r2 115cabdff1aSopenharmony_ci vld1.64 d17, [r0,:64], r2 116cabdff1aSopenharmony_ci vld1.64 d18, [r0,:64], r2 117cabdff1aSopenharmony_ci vld1.64 d19, [r0,:64], r2 118cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #3 119cabdff1aSopenharmony_ci vrhadd.u8 q1, q1, q6 120cabdff1aSopenharmony_ci vrhadd.u8 q2, q2, q7 121cabdff1aSopenharmony_ci vrhadd.u8 q3, q3, q8 122cabdff1aSopenharmony_ci vrhadd.u8 q4, q4, q9 123cabdff1aSopenharmony_ci .endif 124cabdff1aSopenharmony_ci vst1.64 d2, [r0,:64], r2 125cabdff1aSopenharmony_ci vst1.64 d3, [r0,:64], r2 126cabdff1aSopenharmony_ci vst1.64 d4, [r0,:64], r2 127cabdff1aSopenharmony_ci vst1.64 d5, [r0,:64], r2 128cabdff1aSopenharmony_ci vst1.64 d6, [r0,:64], r2 129cabdff1aSopenharmony_ci vst1.64 d7, [r0,:64], r2 130cabdff1aSopenharmony_ci vst1.64 d8, [r0,:64], r2 131cabdff1aSopenharmony_ci vst1.64 d9, [r0,:64], r2 132cabdff1aSopenharmony_ci bx lr 133cabdff1aSopenharmony_ciendfunc 134cabdff1aSopenharmony_ci.endm 135cabdff1aSopenharmony_ci 136cabdff1aSopenharmony_ci rv40_qpel8_h 5 137cabdff1aSopenharmony_ci rv40_qpel8_h 6 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_ci.macro rv40_qpel type 140cabdff1aSopenharmony_cifunction \type\()_rv40_qpel8_h_lowpass_neon 141cabdff1aSopenharmony_ci .ifc \type,avg 142cabdff1aSopenharmony_ci mov r12, r0 143cabdff1aSopenharmony_ci .endif 144cabdff1aSopenharmony_ci1: 145cabdff1aSopenharmony_ci vld1.8 {q2}, [r1], r2 146cabdff1aSopenharmony_ci vld1.8 {q3}, [r1], r2 147cabdff1aSopenharmony_ci qpel_lowpass_x2 d4, d5, d6, d7, d0, d1, 6 148cabdff1aSopenharmony_ci .ifc \type,avg 149cabdff1aSopenharmony_ci vld1.8 {d3}, [r12,:64], r2 150cabdff1aSopenharmony_ci vld1.8 {d16}, [r12,:64], r2 151cabdff1aSopenharmony_ci vrhadd.u8 d4, d4, d3 152cabdff1aSopenharmony_ci vrhadd.u8 d6, d6, d16 153cabdff1aSopenharmony_ci .endif 154cabdff1aSopenharmony_ci vst1.8 {d4}, [r0,:64], r2 155cabdff1aSopenharmony_ci vst1.8 {d6}, [r0,:64], r2 156cabdff1aSopenharmony_ci subs r3, r3, #2 157cabdff1aSopenharmony_ci bgt 1b 158cabdff1aSopenharmony_ci bx lr 159cabdff1aSopenharmony_ciendfunc 160cabdff1aSopenharmony_ci 161cabdff1aSopenharmony_cifunction \type\()_rv40_qpel8_v_lowpass_neon 162cabdff1aSopenharmony_ci vld1.64 {d2}, [r1], r2 163cabdff1aSopenharmony_ci vld1.64 {d3}, [r1], r2 164cabdff1aSopenharmony_ci vld1.64 {d4}, [r1], r2 165cabdff1aSopenharmony_ci vld1.64 {d5}, [r1], r2 166cabdff1aSopenharmony_ci vld1.64 {d6}, [r1], r2 167cabdff1aSopenharmony_ci vld1.64 {d7}, [r1], r2 168cabdff1aSopenharmony_ci vld1.64 {d8}, [r1], r2 169cabdff1aSopenharmony_ci vld1.64 {d9}, [r1], r2 170cabdff1aSopenharmony_ci vld1.64 {d10}, [r1], r2 171cabdff1aSopenharmony_ci vld1.64 {d11}, [r1], r2 172cabdff1aSopenharmony_ci vld1.64 {d12}, [r1], r2 173cabdff1aSopenharmony_ci vld1.64 {d13}, [r1], r2 174cabdff1aSopenharmony_ci vld1.64 {d14}, [r1] 175cabdff1aSopenharmony_ci transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9 176cabdff1aSopenharmony_ci transpose_8x8 d10, d11, d12, d13, d14, d15, d30, d31 177cabdff1aSopenharmony_ci qpel_lowpass_x2 d2, d10, d3, d11, d0, d1, 6 178cabdff1aSopenharmony_ci qpel_lowpass_x2 d4, d12, d5, d13, d0, d1, 6 179cabdff1aSopenharmony_ci qpel_lowpass_x2 d6, d14, d7, d15, d0, d1, 6 180cabdff1aSopenharmony_ci qpel_lowpass_x2 d8, d30, d9, d31, d0, d1, 6 181cabdff1aSopenharmony_ci transpose_8x8 d2, d3, d4, d5, d6, d7, d8, d9 182cabdff1aSopenharmony_ci .ifc \type,avg 183cabdff1aSopenharmony_ci vld1.64 d12, [r0,:64], r2 184cabdff1aSopenharmony_ci vld1.64 d13, [r0,:64], r2 185cabdff1aSopenharmony_ci vld1.64 d14, [r0,:64], r2 186cabdff1aSopenharmony_ci vld1.64 d15, [r0,:64], r2 187cabdff1aSopenharmony_ci vld1.64 d16, [r0,:64], r2 188cabdff1aSopenharmony_ci vld1.64 d17, [r0,:64], r2 189cabdff1aSopenharmony_ci vld1.64 d18, [r0,:64], r2 190cabdff1aSopenharmony_ci vld1.64 d19, [r0,:64], r2 191cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #3 192cabdff1aSopenharmony_ci vrhadd.u8 q1, q1, q6 193cabdff1aSopenharmony_ci vrhadd.u8 q2, q2, q7 194cabdff1aSopenharmony_ci vrhadd.u8 q3, q3, q8 195cabdff1aSopenharmony_ci vrhadd.u8 q4, q4, q9 196cabdff1aSopenharmony_ci .endif 197cabdff1aSopenharmony_ci vst1.64 d2, [r0,:64], r2 198cabdff1aSopenharmony_ci vst1.64 d3, [r0,:64], r2 199cabdff1aSopenharmony_ci vst1.64 d4, [r0,:64], r2 200cabdff1aSopenharmony_ci vst1.64 d5, [r0,:64], r2 201cabdff1aSopenharmony_ci vst1.64 d6, [r0,:64], r2 202cabdff1aSopenharmony_ci vst1.64 d7, [r0,:64], r2 203cabdff1aSopenharmony_ci vst1.64 d8, [r0,:64], r2 204cabdff1aSopenharmony_ci vst1.64 d9, [r0,:64], r2 205cabdff1aSopenharmony_ci bx lr 206cabdff1aSopenharmony_ciendfunc 207cabdff1aSopenharmony_ci 208cabdff1aSopenharmony_ci rv40_qpel8_v 5, \type 209cabdff1aSopenharmony_ci rv40_qpel8_v 6, \type 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc10_neon, export=1 212cabdff1aSopenharmony_ci sub r1, r1, #2 213cabdff1aSopenharmony_ci mov r3, #8 214cabdff1aSopenharmony_ci vmov.i8 d0, #52 215cabdff1aSopenharmony_ci vmov.i8 d1, #20 216cabdff1aSopenharmony_ci b \type\()_rv40_qpel8_h_lowpass_neon 217cabdff1aSopenharmony_ciendfunc 218cabdff1aSopenharmony_ci 219cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc30_neon, export=1 220cabdff1aSopenharmony_ci sub r1, r1, #2 221cabdff1aSopenharmony_ci mov r3, #8 222cabdff1aSopenharmony_ci vmov.i8 d0, #20 223cabdff1aSopenharmony_ci vmov.i8 d1, #52 224cabdff1aSopenharmony_ci b \type\()_rv40_qpel8_h_lowpass_neon 225cabdff1aSopenharmony_ciendfunc 226cabdff1aSopenharmony_ci 227cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc01_neon, export=1 228cabdff1aSopenharmony_ci push {r4, lr} 229cabdff1aSopenharmony_ci vpush {d8-d15} 230cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 231cabdff1aSopenharmony_ci vmov.i8 d0, #52 232cabdff1aSopenharmony_ci vmov.i8 d1, #20 233cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lowpass_neon 234cabdff1aSopenharmony_ci vpop {d8-d15} 235cabdff1aSopenharmony_ci pop {r4, pc} 236cabdff1aSopenharmony_ciendfunc 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc11_neon, export=1 239cabdff1aSopenharmony_ci push {r4, lr} 240cabdff1aSopenharmony_ci vpush {d8-d15} 241cabdff1aSopenharmony_ci sub sp, sp, #14*8 242cabdff1aSopenharmony_ci add r12, sp, #7 243cabdff1aSopenharmony_ci bic r12, r12, #7 244cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 245cabdff1aSopenharmony_ci sub r1, r1, #2 246cabdff1aSopenharmony_ci mov r3, #12 247cabdff1aSopenharmony_ci vmov.i8 d0, #52 248cabdff1aSopenharmony_ci vmov.i8 d1, #20 249cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 250cabdff1aSopenharmony_ci add r1, sp, #7 251cabdff1aSopenharmony_ci bic r1, r1, #7 252cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 253cabdff1aSopenharmony_ci add sp, sp, #14*8 254cabdff1aSopenharmony_ci vpop {d8-d15} 255cabdff1aSopenharmony_ci pop {r4, pc} 256cabdff1aSopenharmony_ciendfunc 257cabdff1aSopenharmony_ci 258cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc21_neon, export=1 259cabdff1aSopenharmony_ci push {r4, lr} 260cabdff1aSopenharmony_ci vpush {d8-d15} 261cabdff1aSopenharmony_ci sub sp, sp, #14*8 262cabdff1aSopenharmony_ci add r12, sp, #7 263cabdff1aSopenharmony_ci bic r12, r12, #7 264cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 265cabdff1aSopenharmony_ci sub r1, r1, #2 266cabdff1aSopenharmony_ci mov r3, #12 267cabdff1aSopenharmony_ci vmov.i8 d0, #20 268cabdff1aSopenharmony_ci vmov.i8 d1, #20 269cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 270cabdff1aSopenharmony_ci add r1, sp, #7 271cabdff1aSopenharmony_ci bic r1, r1, #7 272cabdff1aSopenharmony_ci vmov.i8 d0, #52 273cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 274cabdff1aSopenharmony_ci add sp, sp, #14*8 275cabdff1aSopenharmony_ci vpop {d8-d15} 276cabdff1aSopenharmony_ci pop {r4, pc} 277cabdff1aSopenharmony_ciendfunc 278cabdff1aSopenharmony_ci 279cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc31_neon, export=1 280cabdff1aSopenharmony_ci push {r4, lr} 281cabdff1aSopenharmony_ci vpush {d8-d15} 282cabdff1aSopenharmony_ci sub sp, sp, #14*8 283cabdff1aSopenharmony_ci add r12, sp, #7 284cabdff1aSopenharmony_ci bic r12, r12, #7 285cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 286cabdff1aSopenharmony_ci sub r1, r1, #2 287cabdff1aSopenharmony_ci mov r3, #12 288cabdff1aSopenharmony_ci vmov.i8 d0, #20 289cabdff1aSopenharmony_ci vmov.i8 d1, #52 290cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 291cabdff1aSopenharmony_ci add r1, sp, #7 292cabdff1aSopenharmony_ci bic r1, r1, #7 293cabdff1aSopenharmony_ci vswp d0, d1 294cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 295cabdff1aSopenharmony_ci add sp, sp, #14*8 296cabdff1aSopenharmony_ci vpop {d8-d15} 297cabdff1aSopenharmony_ci pop {r4, pc} 298cabdff1aSopenharmony_ciendfunc 299cabdff1aSopenharmony_ci 300cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc12_neon, export=1 301cabdff1aSopenharmony_ci push {r4, lr} 302cabdff1aSopenharmony_ci vpush {d8-d15} 303cabdff1aSopenharmony_ci sub sp, sp, #14*8 304cabdff1aSopenharmony_ci add r12, sp, #7 305cabdff1aSopenharmony_ci bic r12, r12, #7 306cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 307cabdff1aSopenharmony_ci sub r1, r1, #2 308cabdff1aSopenharmony_ci mov r3, #12 309cabdff1aSopenharmony_ci vmov.i8 d0, #52 310cabdff1aSopenharmony_ci vmov.i8 d1, #20 311cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 312cabdff1aSopenharmony_ci add r1, sp, #7 313cabdff1aSopenharmony_ci bic r1, r1, #7 314cabdff1aSopenharmony_ci vmov.i8 d0, #20 315cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s5_neon 316cabdff1aSopenharmony_ci add sp, sp, #14*8 317cabdff1aSopenharmony_ci vpop {d8-d15} 318cabdff1aSopenharmony_ci pop {r4, pc} 319cabdff1aSopenharmony_ciendfunc 320cabdff1aSopenharmony_ci 321cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc22_neon, export=1 322cabdff1aSopenharmony_ci push {r4, lr} 323cabdff1aSopenharmony_ci vpush {d8-d15} 324cabdff1aSopenharmony_ci sub sp, sp, #14*8 325cabdff1aSopenharmony_ci add r12, sp, #7 326cabdff1aSopenharmony_ci bic r12, r12, #7 327cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 328cabdff1aSopenharmony_ci sub r1, r1, #2 329cabdff1aSopenharmony_ci mov r3, #12 330cabdff1aSopenharmony_ci vmov.i8 d0, #20 331cabdff1aSopenharmony_ci vmov.i8 d1, #20 332cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 333cabdff1aSopenharmony_ci add r1, sp, #7 334cabdff1aSopenharmony_ci bic r1, r1, #7 335cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s5_neon 336cabdff1aSopenharmony_ci add sp, sp, #14*8 337cabdff1aSopenharmony_ci vpop {d8-d15} 338cabdff1aSopenharmony_ci pop {r4, pc} 339cabdff1aSopenharmony_ciendfunc 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc32_neon, export=1 342cabdff1aSopenharmony_ci push {r4, lr} 343cabdff1aSopenharmony_ci vpush {d8-d15} 344cabdff1aSopenharmony_ci sub sp, sp, #14*8 345cabdff1aSopenharmony_ci add r12, sp, #7 346cabdff1aSopenharmony_ci bic r12, r12, #7 347cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 348cabdff1aSopenharmony_ci sub r1, r1, #2 349cabdff1aSopenharmony_ci mov r3, #12 350cabdff1aSopenharmony_ci vmov.i8 d0, #20 351cabdff1aSopenharmony_ci vmov.i8 d1, #52 352cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 353cabdff1aSopenharmony_ci add r1, sp, #7 354cabdff1aSopenharmony_ci bic r1, r1, #7 355cabdff1aSopenharmony_ci vmov.i8 d1, #20 356cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s5_neon 357cabdff1aSopenharmony_ci add sp, sp, #14*8 358cabdff1aSopenharmony_ci vpop {d8-d15} 359cabdff1aSopenharmony_ci pop {r4, pc} 360cabdff1aSopenharmony_ciendfunc 361cabdff1aSopenharmony_ci 362cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc03_neon, export=1 363cabdff1aSopenharmony_ci push {r4, lr} 364cabdff1aSopenharmony_ci vpush {d8-d15} 365cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 366cabdff1aSopenharmony_ci vmov.i8 d0, #20 367cabdff1aSopenharmony_ci vmov.i8 d1, #52 368cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lowpass_neon 369cabdff1aSopenharmony_ci vpop {d8-d15} 370cabdff1aSopenharmony_ci pop {r4, pc} 371cabdff1aSopenharmony_ciendfunc 372cabdff1aSopenharmony_ci 373cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc33_neon, export=1 374cabdff1aSopenharmony_ci mov r3, #8 375cabdff1aSopenharmony_ci b X(ff_\type\()_pixels8_xy2_neon) 376cabdff1aSopenharmony_ciendfunc 377cabdff1aSopenharmony_ci 378cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc13_neon, export=1 379cabdff1aSopenharmony_ci push {r4, lr} 380cabdff1aSopenharmony_ci vpush {d8-d15} 381cabdff1aSopenharmony_ci sub sp, sp, #14*8 382cabdff1aSopenharmony_ci add r12, sp, #7 383cabdff1aSopenharmony_ci bic r12, r12, #7 384cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 385cabdff1aSopenharmony_ci sub r1, r1, #2 386cabdff1aSopenharmony_ci mov r3, #12 387cabdff1aSopenharmony_ci vmov.i8 d0, #52 388cabdff1aSopenharmony_ci vmov.i8 d1, #20 389cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 390cabdff1aSopenharmony_ci add r1, sp, #7 391cabdff1aSopenharmony_ci bic r1, r1, #7 392cabdff1aSopenharmony_ci vswp d0, d1 393cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 394cabdff1aSopenharmony_ci add sp, sp, #14*8 395cabdff1aSopenharmony_ci vpop {d8-d15} 396cabdff1aSopenharmony_ci pop {r4, pc} 397cabdff1aSopenharmony_ciendfunc 398cabdff1aSopenharmony_ci 399cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc23_neon, export=1 400cabdff1aSopenharmony_ci push {r4, lr} 401cabdff1aSopenharmony_ci vpush {d8-d15} 402cabdff1aSopenharmony_ci sub sp, sp, #14*8 403cabdff1aSopenharmony_ci add r12, sp, #7 404cabdff1aSopenharmony_ci bic r12, r12, #7 405cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 406cabdff1aSopenharmony_ci sub r1, r1, #2 407cabdff1aSopenharmony_ci mov r3, #12 408cabdff1aSopenharmony_ci vmov.i8 d0, #20 409cabdff1aSopenharmony_ci vmov.i8 d1, #20 410cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 411cabdff1aSopenharmony_ci add r1, sp, #7 412cabdff1aSopenharmony_ci bic r1, r1, #7 413cabdff1aSopenharmony_ci vmov.i8 d1, #52 414cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 415cabdff1aSopenharmony_ci add sp, sp, #14*8 416cabdff1aSopenharmony_ci vpop {d8-d15} 417cabdff1aSopenharmony_ci pop {r4, pc} 418cabdff1aSopenharmony_ciendfunc 419cabdff1aSopenharmony_ci 420cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc10_neon, export=1 421cabdff1aSopenharmony_ci vmov.i8 d0, #52 422cabdff1aSopenharmony_ci vmov.i8 d1, #20 423cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_h: 424cabdff1aSopenharmony_ci push {r1, lr} 425cabdff1aSopenharmony_ci sub r1, r1, #2 426cabdff1aSopenharmony_ci mov r3, #16 427cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_h_lowpass_neon 428cabdff1aSopenharmony_ci pop {r1, lr} 429cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #4 430cabdff1aSopenharmony_ci add r0, r0, #8 431cabdff1aSopenharmony_ci add r1, r1, #6 432cabdff1aSopenharmony_ci mov r3, #16 433cabdff1aSopenharmony_ci b \type\()_rv40_qpel8_h_lowpass_neon 434cabdff1aSopenharmony_ciendfunc 435cabdff1aSopenharmony_ci 436cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc30_neon, export=1 437cabdff1aSopenharmony_ci vmov.i8 d0, #20 438cabdff1aSopenharmony_ci vmov.i8 d1, #52 439cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_h 440cabdff1aSopenharmony_ciendfunc 441cabdff1aSopenharmony_ci 442cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc01_neon, export=1 443cabdff1aSopenharmony_ci vmov.i8 d0, #52 444cabdff1aSopenharmony_ci vmov.i8 d1, #20 445cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_v: 446cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 447cabdff1aSopenharmony_ci push {r1, lr} 448cabdff1aSopenharmony_ci vpush {d8-d15} 449cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lowpass_neon 450cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #2 451cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lowpass_neon 452cabdff1aSopenharmony_ci ldr r1, [sp, #64] 453cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #4 454cabdff1aSopenharmony_ci add r0, r0, #8 455cabdff1aSopenharmony_ci add r1, r1, #8 456cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lowpass_neon 457cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #2 458cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lowpass_neon 459cabdff1aSopenharmony_ci vpop {d8-d15} 460cabdff1aSopenharmony_ci pop {r1, pc} 461cabdff1aSopenharmony_ciendfunc 462cabdff1aSopenharmony_ci 463cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc11_neon, export=1 464cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 465cabdff1aSopenharmony_ci sub r1, r1, #2 466cabdff1aSopenharmony_ci push {r1, lr} 467cabdff1aSopenharmony_ci vpush {d8-d15} 468cabdff1aSopenharmony_ci sub sp, sp, #44*8 469cabdff1aSopenharmony_ci add r12, sp, #7 470cabdff1aSopenharmony_ci bic r12, r12, #7 471cabdff1aSopenharmony_ci mov r3, #20 472cabdff1aSopenharmony_ci vmov.i8 d0, #52 473cabdff1aSopenharmony_ci vmov.i8 d1, #20 474cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 475cabdff1aSopenharmony_ci ldr r1, [sp, #416] 476cabdff1aSopenharmony_ci add r1, r1, #8 477cabdff1aSopenharmony_ci mov r3, #20 478cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 479cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_v_s6: 480cabdff1aSopenharmony_ci add r1, sp, #7 481cabdff1aSopenharmony_ci bic r1, r1, #7 482cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 483cabdff1aSopenharmony_ci sub r1, r1, #40 484cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 485cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #4 486cabdff1aSopenharmony_ci add r0, r0, #8 487cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 488cabdff1aSopenharmony_ci sub r1, r1, #40 489cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s6_neon 490cabdff1aSopenharmony_ci add sp, sp, #44*8 491cabdff1aSopenharmony_ci vpop {d8-d15} 492cabdff1aSopenharmony_ci pop {r1, pc} 493cabdff1aSopenharmony_ciendfunc 494cabdff1aSopenharmony_ci 495cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc21_neon, export=1 496cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 497cabdff1aSopenharmony_ci sub r1, r1, #2 498cabdff1aSopenharmony_ci push {r1, lr} 499cabdff1aSopenharmony_ci vpush {d8-d15} 500cabdff1aSopenharmony_ci sub sp, sp, #44*8 501cabdff1aSopenharmony_ci add r12, sp, #7 502cabdff1aSopenharmony_ci bic r12, r12, #7 503cabdff1aSopenharmony_ci mov r3, #20 504cabdff1aSopenharmony_ci vmov.i8 d0, #20 505cabdff1aSopenharmony_ci vmov.i8 d1, #20 506cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 507cabdff1aSopenharmony_ci ldr r1, [sp, #416] 508cabdff1aSopenharmony_ci add r1, r1, #8 509cabdff1aSopenharmony_ci mov r3, #20 510cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 511cabdff1aSopenharmony_ci vmov.i8 d0, #52 512cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_v_s6 513cabdff1aSopenharmony_ciendfunc 514cabdff1aSopenharmony_ci 515cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc31_neon, export=1 516cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 517cabdff1aSopenharmony_ci sub r1, r1, #2 518cabdff1aSopenharmony_ci push {r1, lr} 519cabdff1aSopenharmony_ci vpush {d8-d15} 520cabdff1aSopenharmony_ci sub sp, sp, #44*8 521cabdff1aSopenharmony_ci add r12, sp, #7 522cabdff1aSopenharmony_ci bic r12, r12, #7 523cabdff1aSopenharmony_ci mov r3, #20 524cabdff1aSopenharmony_ci vmov.i8 d0, #20 525cabdff1aSopenharmony_ci vmov.i8 d1, #52 526cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 527cabdff1aSopenharmony_ci ldr r1, [sp, #416] 528cabdff1aSopenharmony_ci add r1, r1, #8 529cabdff1aSopenharmony_ci mov r3, #20 530cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 531cabdff1aSopenharmony_ci vswp d0, d1 532cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_v_s6 533cabdff1aSopenharmony_ciendfunc 534cabdff1aSopenharmony_ci 535cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc12_neon, export=1 536cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 537cabdff1aSopenharmony_ci sub r1, r1, #2 538cabdff1aSopenharmony_ci push {r1, lr} 539cabdff1aSopenharmony_ci vpush {d8-d15} 540cabdff1aSopenharmony_ci sub sp, sp, #44*8 541cabdff1aSopenharmony_ci add r12, sp, #7 542cabdff1aSopenharmony_ci bic r12, r12, #7 543cabdff1aSopenharmony_ci mov r3, #20 544cabdff1aSopenharmony_ci vmov.i8 d0, #52 545cabdff1aSopenharmony_ci vmov.i8 d1, #20 546cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 547cabdff1aSopenharmony_ci ldr r1, [sp, #416] 548cabdff1aSopenharmony_ci add r1, r1, #8 549cabdff1aSopenharmony_ci mov r3, #20 550cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 551cabdff1aSopenharmony_ci vmov.i8 d0, #20 552cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_v_s5: 553cabdff1aSopenharmony_ci add r1, sp, #7 554cabdff1aSopenharmony_ci bic r1, r1, #7 555cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s5_neon 556cabdff1aSopenharmony_ci sub r1, r1, #40 557cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s5_neon 558cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #4 559cabdff1aSopenharmony_ci add r0, r0, #8 560cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s5_neon 561cabdff1aSopenharmony_ci sub r1, r1, #40 562cabdff1aSopenharmony_ci bl \type\()_rv40_qpel8_v_lp_packed_s5_neon 563cabdff1aSopenharmony_ci add sp, sp, #44*8 564cabdff1aSopenharmony_ci vpop {d8-d15} 565cabdff1aSopenharmony_ci pop {r1, pc} 566cabdff1aSopenharmony_ciendfunc 567cabdff1aSopenharmony_ci 568cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc22_neon, export=1 569cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 570cabdff1aSopenharmony_ci sub r1, r1, #2 571cabdff1aSopenharmony_ci push {r1, lr} 572cabdff1aSopenharmony_ci vpush {d8-d15} 573cabdff1aSopenharmony_ci sub sp, sp, #44*8 574cabdff1aSopenharmony_ci add r12, sp, #7 575cabdff1aSopenharmony_ci bic r12, r12, #7 576cabdff1aSopenharmony_ci mov r3, #20 577cabdff1aSopenharmony_ci vmov.i8 d0, #20 578cabdff1aSopenharmony_ci vmov.i8 d1, #20 579cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 580cabdff1aSopenharmony_ci ldr r1, [sp, #416] 581cabdff1aSopenharmony_ci add r1, r1, #8 582cabdff1aSopenharmony_ci mov r3, #20 583cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 584cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_v_s5 585cabdff1aSopenharmony_ciendfunc 586cabdff1aSopenharmony_ci 587cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc32_neon, export=1 588cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 589cabdff1aSopenharmony_ci sub r1, r1, #2 590cabdff1aSopenharmony_ci push {r1, lr} 591cabdff1aSopenharmony_ci vpush {d8-d15} 592cabdff1aSopenharmony_ci sub sp, sp, #44*8 593cabdff1aSopenharmony_ci add r12, sp, #7 594cabdff1aSopenharmony_ci bic r12, r12, #7 595cabdff1aSopenharmony_ci mov r3, #20 596cabdff1aSopenharmony_ci vmov.i8 d0, #20 597cabdff1aSopenharmony_ci vmov.i8 d1, #52 598cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 599cabdff1aSopenharmony_ci ldr r1, [sp, #416] 600cabdff1aSopenharmony_ci add r1, r1, #8 601cabdff1aSopenharmony_ci mov r3, #20 602cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 603cabdff1aSopenharmony_ci vmov.i8 d1, #20 604cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_v_s5 605cabdff1aSopenharmony_ciendfunc 606cabdff1aSopenharmony_ci 607cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc03_neon, export=1 608cabdff1aSopenharmony_ci vmov.i8 d0, #20 609cabdff1aSopenharmony_ci vmov.i8 d1, #52 610cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_v 611cabdff1aSopenharmony_ciendfunc 612cabdff1aSopenharmony_ci 613cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc13_neon, export=1 614cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 615cabdff1aSopenharmony_ci sub r1, r1, #2 616cabdff1aSopenharmony_ci push {r1, lr} 617cabdff1aSopenharmony_ci vpush {d8-d15} 618cabdff1aSopenharmony_ci sub sp, sp, #44*8 619cabdff1aSopenharmony_ci add r12, sp, #7 620cabdff1aSopenharmony_ci bic r12, r12, #7 621cabdff1aSopenharmony_ci mov r3, #20 622cabdff1aSopenharmony_ci vmov.i8 d0, #52 623cabdff1aSopenharmony_ci vmov.i8 d1, #20 624cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 625cabdff1aSopenharmony_ci ldr r1, [sp, #416] 626cabdff1aSopenharmony_ci add r1, r1, #8 627cabdff1aSopenharmony_ci mov r3, #20 628cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s6_neon 629cabdff1aSopenharmony_ci vswp d0, d1 630cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_v_s6 631cabdff1aSopenharmony_ciendfunc 632cabdff1aSopenharmony_ci 633cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc23_neon, export=1 634cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 635cabdff1aSopenharmony_ci sub r1, r1, #2 636cabdff1aSopenharmony_ci push {r1, lr} 637cabdff1aSopenharmony_ci vpush {d8-d15} 638cabdff1aSopenharmony_ci sub sp, sp, #44*8 639cabdff1aSopenharmony_ci add r12, sp, #7 640cabdff1aSopenharmony_ci bic r12, r12, #7 641cabdff1aSopenharmony_ci mov r3, #20 642cabdff1aSopenharmony_ci vmov.i8 d0, #20 643cabdff1aSopenharmony_ci vmov.i8 d1, #20 644cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 645cabdff1aSopenharmony_ci ldr r1, [sp, #416] 646cabdff1aSopenharmony_ci add r1, r1, #8 647cabdff1aSopenharmony_ci mov r3, #20 648cabdff1aSopenharmony_ci bl put_rv40_qpel8_h_lp_packed_s5_neon 649cabdff1aSopenharmony_ci vmov.i8 d1, #52 650cabdff1aSopenharmony_ci b .L\type\()_rv40_qpel16_v_s6 651cabdff1aSopenharmony_ciendfunc 652cabdff1aSopenharmony_ci 653cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc33_neon, export=1 654cabdff1aSopenharmony_ci mov r3, #16 655cabdff1aSopenharmony_ci b X(ff_\type\()_pixels16_xy2_neon) 656cabdff1aSopenharmony_ciendfunc 657cabdff1aSopenharmony_ci.endm 658cabdff1aSopenharmony_ci 659cabdff1aSopenharmony_ci rv40_qpel put 660cabdff1aSopenharmony_ci rv40_qpel avg 661cabdff1aSopenharmony_ci 662cabdff1aSopenharmony_ci.macro rv40_weight 663cabdff1aSopenharmony_ci vmovl.u8 q8, d2 664cabdff1aSopenharmony_ci vmovl.u8 q9, d3 665cabdff1aSopenharmony_ci vmovl.u8 q10, d4 666cabdff1aSopenharmony_ci vmovl.u8 q11, d5 667cabdff1aSopenharmony_ci vmull.u16 q2, d16, d0[2] 668cabdff1aSopenharmony_ci vmull.u16 q3, d17, d0[2] 669cabdff1aSopenharmony_ci vmull.u16 q8, d18, d0[2] 670cabdff1aSopenharmony_ci vmull.u16 q9, d19, d0[2] 671cabdff1aSopenharmony_ci vmull.u16 q12, d20, d0[0] 672cabdff1aSopenharmony_ci vmull.u16 q13, d21, d0[0] 673cabdff1aSopenharmony_ci vmull.u16 q14, d22, d0[0] 674cabdff1aSopenharmony_ci vmull.u16 q15, d23, d0[0] 675cabdff1aSopenharmony_ci vshrn.i32 d4, q2, #9 676cabdff1aSopenharmony_ci vshrn.i32 d5, q3, #9 677cabdff1aSopenharmony_ci vshrn.i32 d6, q8, #9 678cabdff1aSopenharmony_ci vshrn.i32 d7, q9, #9 679cabdff1aSopenharmony_ci vshrn.i32 d16, q12, #9 680cabdff1aSopenharmony_ci vshrn.i32 d17, q13, #9 681cabdff1aSopenharmony_ci vshrn.i32 d18, q14, #9 682cabdff1aSopenharmony_ci vshrn.i32 d19, q15, #9 683cabdff1aSopenharmony_ci vadd.u16 q2, q2, q8 684cabdff1aSopenharmony_ci vadd.u16 q3, q3, q9 685cabdff1aSopenharmony_ci vrshrn.i16 d2, q2, #5 686cabdff1aSopenharmony_ci vrshrn.i16 d3, q3, #5 687cabdff1aSopenharmony_ci.endm 688cabdff1aSopenharmony_ci 689cabdff1aSopenharmony_ci/* void ff_rv40_weight_func_16_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2, 690cabdff1aSopenharmony_ci int w1, int w2, int stride) */ 691cabdff1aSopenharmony_cifunction ff_rv40_weight_func_16_neon, export=1 692cabdff1aSopenharmony_ci ldr r12, [sp] 693cabdff1aSopenharmony_ci vmov d0, r3, r12 694cabdff1aSopenharmony_ci ldr r12, [sp, #4] 695cabdff1aSopenharmony_ci mov r3, #16 696cabdff1aSopenharmony_ci1: 697cabdff1aSopenharmony_ci vld1.8 {q1}, [r1,:128], r12 698cabdff1aSopenharmony_ci vld1.8 {q2}, [r2,:128], r12 699cabdff1aSopenharmony_ci rv40_weight 700cabdff1aSopenharmony_ci vst1.8 {q1}, [r0,:128], r12 701cabdff1aSopenharmony_ci subs r3, r3, #1 702cabdff1aSopenharmony_ci bne 1b 703cabdff1aSopenharmony_ci bx lr 704cabdff1aSopenharmony_ciendfunc 705cabdff1aSopenharmony_ci 706cabdff1aSopenharmony_ci/* void ff_rv40_weight_func_8_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2, 707cabdff1aSopenharmony_ci int w1, int w2, int stride) */ 708cabdff1aSopenharmony_cifunction ff_rv40_weight_func_8_neon, export=1 709cabdff1aSopenharmony_ci ldr r12, [sp] 710cabdff1aSopenharmony_ci vmov d0, r3, r12 711cabdff1aSopenharmony_ci ldr r12, [sp, #4] 712cabdff1aSopenharmony_ci mov r3, #8 713cabdff1aSopenharmony_ci1: 714cabdff1aSopenharmony_ci vld1.8 {d2}, [r1,:64], r12 715cabdff1aSopenharmony_ci vld1.8 {d3}, [r1,:64], r12 716cabdff1aSopenharmony_ci vld1.8 {d4}, [r2,:64], r12 717cabdff1aSopenharmony_ci vld1.8 {d5}, [r2,:64], r12 718cabdff1aSopenharmony_ci rv40_weight 719cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r12 720cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r12 721cabdff1aSopenharmony_ci subs r3, r3, #2 722cabdff1aSopenharmony_ci bne 1b 723cabdff1aSopenharmony_ci bx lr 724cabdff1aSopenharmony_ciendfunc 725cabdff1aSopenharmony_ci 726cabdff1aSopenharmony_cifunction ff_rv40_h_loop_filter_strength_neon, export=1 727cabdff1aSopenharmony_ci pkhbt r2, r3, r2, lsl #18 728cabdff1aSopenharmony_ci 729cabdff1aSopenharmony_ci ldr r3, [r0] 730cabdff1aSopenharmony_ci ldr_dpre r12, r0, r1 731cabdff1aSopenharmony_ci teq r3, r12 732cabdff1aSopenharmony_ci beq 1f 733cabdff1aSopenharmony_ci 734cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1 735cabdff1aSopenharmony_ci 736cabdff1aSopenharmony_ci vld1.32 {d4[]}, [r0,:32], r1 @ -3 737cabdff1aSopenharmony_ci vld1.32 {d0[]}, [r0,:32], r1 @ -2 738cabdff1aSopenharmony_ci vld1.32 {d4[1]}, [r0,:32], r1 @ -1 739cabdff1aSopenharmony_ci vld1.32 {d5[]}, [r0,:32], r1 @ 0 740cabdff1aSopenharmony_ci vld1.32 {d1[]}, [r0,:32], r1 @ 1 741cabdff1aSopenharmony_ci vld1.32 {d5[0]}, [r0,:32], r1 @ 2 742cabdff1aSopenharmony_ci 743cabdff1aSopenharmony_ci vpaddl.u8 q8, q0 @ -2, -2, -2, -2, 1, 1, 1, 1 744cabdff1aSopenharmony_ci vpaddl.u8 q9, q2 @ -3, -3, -1, -1, 2, 2, 0, 0 745cabdff1aSopenharmony_ci vdup.32 d30, r2 @ beta2, beta << 2 746cabdff1aSopenharmony_ci vpadd.u16 d16, d16, d17 @ -2, -2, 1, 1 747cabdff1aSopenharmony_ci vpadd.u16 d18, d18, d19 @ -3, -1, 2, 0 748cabdff1aSopenharmony_ci vabd.u16 d16, d18, d16 749cabdff1aSopenharmony_ci vclt.u16 d16, d16, d30 750cabdff1aSopenharmony_ci 751cabdff1aSopenharmony_ci ldrd r2, r3, [sp, #4] 752cabdff1aSopenharmony_ci vmovl.u16 q12, d16 753cabdff1aSopenharmony_ci vtrn.16 d16, d17 754cabdff1aSopenharmony_ci vshr.u32 q12, q12, #15 755cabdff1aSopenharmony_ci ldr r0, [sp] 756cabdff1aSopenharmony_ci vst1.32 {d24[1]}, [r2,:32] 757cabdff1aSopenharmony_ci vst1.32 {d25[1]}, [r3,:32] 758cabdff1aSopenharmony_ci 759cabdff1aSopenharmony_ci cmp r0, #0 760cabdff1aSopenharmony_ci it eq 761cabdff1aSopenharmony_ci bxeq lr 762cabdff1aSopenharmony_ci 763cabdff1aSopenharmony_ci vand d18, d16, d17 764cabdff1aSopenharmony_ci vtrn.32 d18, d19 765cabdff1aSopenharmony_ci vand d18, d18, d19 766cabdff1aSopenharmony_ci vmov.u16 r0, d18[0] 767cabdff1aSopenharmony_ci bx lr 768cabdff1aSopenharmony_ci1: 769cabdff1aSopenharmony_ci ldrd r2, r3, [sp, #4] 770cabdff1aSopenharmony_ci mov r0, #0 771cabdff1aSopenharmony_ci str r0, [r2] 772cabdff1aSopenharmony_ci str r0, [r3] 773cabdff1aSopenharmony_ci bx lr 774cabdff1aSopenharmony_ciendfunc 775cabdff1aSopenharmony_ci 776cabdff1aSopenharmony_cifunction ff_rv40_v_loop_filter_strength_neon, export=1 777cabdff1aSopenharmony_ci sub r0, r0, #3 778cabdff1aSopenharmony_ci pkhbt r2, r3, r2, lsl #18 779cabdff1aSopenharmony_ci 780cabdff1aSopenharmony_ci vld1.8 {d0}, [r0], r1 781cabdff1aSopenharmony_ci vld1.8 {d1}, [r0], r1 782cabdff1aSopenharmony_ci vld1.8 {d2}, [r0], r1 783cabdff1aSopenharmony_ci vld1.8 {d3}, [r0], r1 784cabdff1aSopenharmony_ci 785cabdff1aSopenharmony_ci vaddl.u8 q0, d0, d1 786cabdff1aSopenharmony_ci vaddl.u8 q1, d2, d3 787cabdff1aSopenharmony_ci vdup.32 q15, r2 788cabdff1aSopenharmony_ci vadd.u16 q0, q0, q1 @ -3, -2, -1, 0, 1, 2 789cabdff1aSopenharmony_ci vext.16 q1, q0, q0, #1 @ -2, -1, 0, 1, 2 790cabdff1aSopenharmony_ci vabd.u16 q0, q1, q0 791cabdff1aSopenharmony_ci vclt.u16 q0, q0, q15 792cabdff1aSopenharmony_ci 793cabdff1aSopenharmony_ci ldrd r2, r3, [sp, #4] 794cabdff1aSopenharmony_ci vmovl.u16 q1, d0 795cabdff1aSopenharmony_ci vext.16 d1, d0, d1, #3 796cabdff1aSopenharmony_ci vshr.u32 q1, q1, #15 797cabdff1aSopenharmony_ci ldr r0, [sp] 798cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r2,:32] 799cabdff1aSopenharmony_ci vst1.32 {d3[1]}, [r3,:32] 800cabdff1aSopenharmony_ci 801cabdff1aSopenharmony_ci cmp r0, #0 802cabdff1aSopenharmony_ci it eq 803cabdff1aSopenharmony_ci bxeq lr 804cabdff1aSopenharmony_ci 805cabdff1aSopenharmony_ci vand d0, d0, d1 806cabdff1aSopenharmony_ci vtrn.16 d0, d1 807cabdff1aSopenharmony_ci vand d0, d0, d1 808cabdff1aSopenharmony_ci vmov.u16 r0, d0[0] 809cabdff1aSopenharmony_ci bx lr 810cabdff1aSopenharmony_ciendfunc 811cabdff1aSopenharmony_ci 812cabdff1aSopenharmony_ci.macro rv40_weak_loop_filter 813cabdff1aSopenharmony_ci vdup.16 d30, r2 @ filter_p1 814cabdff1aSopenharmony_ci vdup.16 d31, r3 @ filter_q1 815cabdff1aSopenharmony_ci ldrd r2, r3, [sp] 816cabdff1aSopenharmony_ci vdup.16 d28, r2 @ alpha 817cabdff1aSopenharmony_ci vdup.16 d29, r3 @ beta 818cabdff1aSopenharmony_ci ldr r12, [sp, #8] 819cabdff1aSopenharmony_ci vdup.16 d25, r12 @ lim_p0q0 820cabdff1aSopenharmony_ci ldrd r2, r3, [sp, #12] 821cabdff1aSopenharmony_ci vsubl.u8 q9, d5, d4 @ x, t 822cabdff1aSopenharmony_ci vabdl.u8 q8, d5, d4 @ x, abs(t) 823cabdff1aSopenharmony_ci vneg.s16 q15, q15 824cabdff1aSopenharmony_ci vceq.i16 d16, d19, #0 @ !t 825cabdff1aSopenharmony_ci vshl.s16 d19, d19, #2 @ t << 2 826cabdff1aSopenharmony_ci vmul.u16 d18, d17, d28 @ alpha * abs(t) 827cabdff1aSopenharmony_ci vand d24, d30, d31 @ filter_p1 & filter_q1 828cabdff1aSopenharmony_ci vsubl.u8 q1, d0, d4 @ p1p2, p1p0 829cabdff1aSopenharmony_ci vsubl.u8 q3, d1, d5 @ q1q2, q1q0 830cabdff1aSopenharmony_ci vmov.i16 d22, #3 831cabdff1aSopenharmony_ci vshr.u16 d18, d18, #7 832cabdff1aSopenharmony_ci vadd.i16 d22, d22, d24 @ 3 - (filter_p1 & filter_q1) 833cabdff1aSopenharmony_ci vsubl.u8 q10, d0, d1 @ src[-2] - src[1] 834cabdff1aSopenharmony_ci vcle.u16 d18, d18, d22 835cabdff1aSopenharmony_ci vand d20, d20, d24 836cabdff1aSopenharmony_ci vneg.s16 d23, d25 @ -lim_p0q0 837cabdff1aSopenharmony_ci vadd.s16 d19, d19, d20 838cabdff1aSopenharmony_ci vbic d16, d18, d16 @ t && u <= 3 - (fp1 & fq1) 839cabdff1aSopenharmony_ci vtrn.32 d4, d5 @ -3, 2, -1, 0 840cabdff1aSopenharmony_ci vrshr.s16 d19, d19, #3 841cabdff1aSopenharmony_ci vmov d28, d29 @ beta 842cabdff1aSopenharmony_ci vswp d3, d6 @ q1q2, p1p0 843cabdff1aSopenharmony_ci vmin.s16 d19, d19, d25 844cabdff1aSopenharmony_ci vand d30, d30, d16 845cabdff1aSopenharmony_ci vand d31, d31, d16 846cabdff1aSopenharmony_ci vadd.s16 q10, q1, q3 @ p1p2 + p1p0, q1q2 + q1q0 847cabdff1aSopenharmony_ci vmax.s16 d19, d19, d23 @ diff 848cabdff1aSopenharmony_ci vabs.s16 q1, q1 @ abs(p1p2), abs(q1q2) 849cabdff1aSopenharmony_ci vand d18, d19, d16 @ diff 850cabdff1aSopenharmony_ci vcle.u16 q1, q1, q14 851cabdff1aSopenharmony_ci vneg.s16 d19, d18 @ -diff 852cabdff1aSopenharmony_ci vdup.16 d26, r3 @ lim_p1 853cabdff1aSopenharmony_ci vaddw.u8 q2, q9, d5 @ src[-1]+diff, src[0]-diff 854cabdff1aSopenharmony_ci vhsub.s16 q11, q10, q9 855cabdff1aSopenharmony_ci vand q1, q1, q15 856cabdff1aSopenharmony_ci vqmovun.s16 d4, q2 @ -1, 0 857cabdff1aSopenharmony_ci vand q9, q11, q1 858cabdff1aSopenharmony_ci vdup.16 d27, r2 @ lim_q1 859cabdff1aSopenharmony_ci vneg.s16 q9, q9 860cabdff1aSopenharmony_ci vneg.s16 q14, q13 861cabdff1aSopenharmony_ci vmin.s16 q9, q9, q13 862cabdff1aSopenharmony_ci vtrn.32 d0, d1 @ -2, 1, -2, 1 863cabdff1aSopenharmony_ci vmax.s16 q9, q9, q14 864cabdff1aSopenharmony_ci vaddw.u8 q3, q9, d0 865cabdff1aSopenharmony_ci vqmovun.s16 d5, q3 @ -2, 1 866cabdff1aSopenharmony_ci.endm 867cabdff1aSopenharmony_ci 868cabdff1aSopenharmony_cifunction ff_rv40_h_weak_loop_filter_neon, export=1 869cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1 870cabdff1aSopenharmony_ci sub r0, r0, r1 871cabdff1aSopenharmony_ci 872cabdff1aSopenharmony_ci vld1.32 {d4[]}, [r0,:32], r1 873cabdff1aSopenharmony_ci vld1.32 {d0[]}, [r0,:32], r1 874cabdff1aSopenharmony_ci vld1.32 {d4[1]}, [r0,:32], r1 875cabdff1aSopenharmony_ci vld1.32 {d5[]}, [r0,:32], r1 876cabdff1aSopenharmony_ci vld1.32 {d1[]}, [r0,:32], r1 877cabdff1aSopenharmony_ci vld1.32 {d5[0]}, [r0,:32] 878cabdff1aSopenharmony_ci 879cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #2 880cabdff1aSopenharmony_ci 881cabdff1aSopenharmony_ci rv40_weak_loop_filter 882cabdff1aSopenharmony_ci 883cabdff1aSopenharmony_ci vst1.32 {d5[0]}, [r0,:32], r1 884cabdff1aSopenharmony_ci vst1.32 {d4[0]}, [r0,:32], r1 885cabdff1aSopenharmony_ci vst1.32 {d4[1]}, [r0,:32], r1 886cabdff1aSopenharmony_ci vst1.32 {d5[1]}, [r0,:32], r1 887cabdff1aSopenharmony_ci 888cabdff1aSopenharmony_ci bx lr 889cabdff1aSopenharmony_ciendfunc 890cabdff1aSopenharmony_ci 891cabdff1aSopenharmony_cifunction ff_rv40_v_weak_loop_filter_neon, export=1 892cabdff1aSopenharmony_ci sub r12, r0, #3 893cabdff1aSopenharmony_ci sub r0, r0, #2 894cabdff1aSopenharmony_ci 895cabdff1aSopenharmony_ci vld1.8 {d4}, [r12], r1 896cabdff1aSopenharmony_ci vld1.8 {d5}, [r12], r1 897cabdff1aSopenharmony_ci vld1.8 {d2}, [r12], r1 898cabdff1aSopenharmony_ci vld1.8 {d3}, [r12], r1 899cabdff1aSopenharmony_ci 900cabdff1aSopenharmony_ci vtrn.16 q2, q1 901cabdff1aSopenharmony_ci vtrn.8 d4, d5 902cabdff1aSopenharmony_ci vtrn.8 d2, d3 903cabdff1aSopenharmony_ci 904cabdff1aSopenharmony_ci vrev64.32 d5, d5 905cabdff1aSopenharmony_ci vtrn.32 q2, q1 906cabdff1aSopenharmony_ci vdup.32 d0, d3[0] 907cabdff1aSopenharmony_ci vdup.32 d1, d2[0] 908cabdff1aSopenharmony_ci 909cabdff1aSopenharmony_ci rv40_weak_loop_filter 910cabdff1aSopenharmony_ci 911cabdff1aSopenharmony_ci vtrn.32 q2, q3 912cabdff1aSopenharmony_ci vswp d4, d5 913cabdff1aSopenharmony_ci 914cabdff1aSopenharmony_ci vst4.8 {d4[0],d5[0],d6[0],d7[0]}, [r0], r1 915cabdff1aSopenharmony_ci vst4.8 {d4[1],d5[1],d6[1],d7[1]}, [r0], r1 916cabdff1aSopenharmony_ci vst4.8 {d4[2],d5[2],d6[2],d7[2]}, [r0], r1 917cabdff1aSopenharmony_ci vst4.8 {d4[3],d5[3],d6[3],d7[3]}, [r0], r1 918cabdff1aSopenharmony_ci 919cabdff1aSopenharmony_ci bx lr 920cabdff1aSopenharmony_ciendfunc 921