1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 22cabdff1aSopenharmony_ci#include "neon.S" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci /* H.264 qpel MC */ 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci.macro lowpass_const r 27cabdff1aSopenharmony_ci movw \r, #5 28cabdff1aSopenharmony_ci movt \r, #20 29cabdff1aSopenharmony_ci vmov.32 d6[0], \r 30cabdff1aSopenharmony_ci.endm 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1 33cabdff1aSopenharmony_ci .if \narrow 34cabdff1aSopenharmony_ci t0 .req q0 35cabdff1aSopenharmony_ci t1 .req q8 36cabdff1aSopenharmony_ci .else 37cabdff1aSopenharmony_ci t0 .req \d0 38cabdff1aSopenharmony_ci t1 .req \d1 39cabdff1aSopenharmony_ci .endif 40cabdff1aSopenharmony_ci vext.8 d2, \r0, \r1, #2 41cabdff1aSopenharmony_ci vext.8 d3, \r0, \r1, #3 42cabdff1aSopenharmony_ci vaddl.u8 q1, d2, d3 43cabdff1aSopenharmony_ci vext.8 d4, \r0, \r1, #1 44cabdff1aSopenharmony_ci vext.8 d5, \r0, \r1, #4 45cabdff1aSopenharmony_ci vaddl.u8 q2, d4, d5 46cabdff1aSopenharmony_ci vext.8 d30, \r0, \r1, #5 47cabdff1aSopenharmony_ci vaddl.u8 t0, \r0, d30 48cabdff1aSopenharmony_ci vext.8 d18, \r2, \r3, #2 49cabdff1aSopenharmony_ci vmla.i16 t0, q1, d6[1] 50cabdff1aSopenharmony_ci vext.8 d19, \r2, \r3, #3 51cabdff1aSopenharmony_ci vaddl.u8 q9, d18, d19 52cabdff1aSopenharmony_ci vext.8 d20, \r2, \r3, #1 53cabdff1aSopenharmony_ci vmls.i16 t0, q2, d6[0] 54cabdff1aSopenharmony_ci vext.8 d21, \r2, \r3, #4 55cabdff1aSopenharmony_ci vaddl.u8 q10, d20, d21 56cabdff1aSopenharmony_ci vext.8 d31, \r2, \r3, #5 57cabdff1aSopenharmony_ci vaddl.u8 t1, \r2, d31 58cabdff1aSopenharmony_ci vmla.i16 t1, q9, d6[1] 59cabdff1aSopenharmony_ci vmls.i16 t1, q10, d6[0] 60cabdff1aSopenharmony_ci .if \narrow 61cabdff1aSopenharmony_ci vqrshrun.s16 \d0, t0, #5 62cabdff1aSopenharmony_ci vqrshrun.s16 \d1, t1, #5 63cabdff1aSopenharmony_ci .endif 64cabdff1aSopenharmony_ci .unreq t0 65cabdff1aSopenharmony_ci .unreq t1 66cabdff1aSopenharmony_ci.endm 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci.macro lowpass_8_1 r0, r1, d0, narrow=1 69cabdff1aSopenharmony_ci .if \narrow 70cabdff1aSopenharmony_ci t0 .req q0 71cabdff1aSopenharmony_ci .else 72cabdff1aSopenharmony_ci t0 .req \d0 73cabdff1aSopenharmony_ci .endif 74cabdff1aSopenharmony_ci vext.8 d2, \r0, \r1, #2 75cabdff1aSopenharmony_ci vext.8 d3, \r0, \r1, #3 76cabdff1aSopenharmony_ci vaddl.u8 q1, d2, d3 77cabdff1aSopenharmony_ci vext.8 d4, \r0, \r1, #1 78cabdff1aSopenharmony_ci vext.8 d5, \r0, \r1, #4 79cabdff1aSopenharmony_ci vaddl.u8 q2, d4, d5 80cabdff1aSopenharmony_ci vext.8 d30, \r0, \r1, #5 81cabdff1aSopenharmony_ci vaddl.u8 t0, \r0, d30 82cabdff1aSopenharmony_ci vmla.i16 t0, q1, d6[1] 83cabdff1aSopenharmony_ci vmls.i16 t0, q2, d6[0] 84cabdff1aSopenharmony_ci .if \narrow 85cabdff1aSopenharmony_ci vqrshrun.s16 \d0, t0, #5 86cabdff1aSopenharmony_ci .endif 87cabdff1aSopenharmony_ci .unreq t0 88cabdff1aSopenharmony_ci.endm 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci.macro lowpass_8.16 r0, r1, l0, h0, l1, h1, d 91cabdff1aSopenharmony_ci vext.16 q1, \r0, \r1, #2 92cabdff1aSopenharmony_ci vext.16 q0, \r0, \r1, #3 93cabdff1aSopenharmony_ci vaddl.s16 q9, d2, d0 94cabdff1aSopenharmony_ci vext.16 q2, \r0, \r1, #1 95cabdff1aSopenharmony_ci vaddl.s16 q1, d3, d1 96cabdff1aSopenharmony_ci vext.16 q3, \r0, \r1, #4 97cabdff1aSopenharmony_ci vaddl.s16 q10, d4, d6 98cabdff1aSopenharmony_ci vext.16 \r1, \r0, \r1, #5 99cabdff1aSopenharmony_ci vaddl.s16 q2, d5, d7 100cabdff1aSopenharmony_ci vaddl.s16 q0, \h0, \h1 101cabdff1aSopenharmony_ci vaddl.s16 q8, \l0, \l1 102cabdff1aSopenharmony_ci 103cabdff1aSopenharmony_ci vshl.i32 q3, q9, #4 104cabdff1aSopenharmony_ci vshl.i32 q9, q9, #2 105cabdff1aSopenharmony_ci vshl.i32 q15, q10, #2 106cabdff1aSopenharmony_ci vadd.i32 q9, q9, q3 107cabdff1aSopenharmony_ci vadd.i32 q10, q10, q15 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_ci vshl.i32 q3, q1, #4 110cabdff1aSopenharmony_ci vshl.i32 q1, q1, #2 111cabdff1aSopenharmony_ci vshl.i32 q15, q2, #2 112cabdff1aSopenharmony_ci vadd.i32 q1, q1, q3 113cabdff1aSopenharmony_ci vadd.i32 q2, q2, q15 114cabdff1aSopenharmony_ci 115cabdff1aSopenharmony_ci vadd.i32 q9, q9, q8 116cabdff1aSopenharmony_ci vsub.i32 q9, q9, q10 117cabdff1aSopenharmony_ci 118cabdff1aSopenharmony_ci vadd.i32 q1, q1, q0 119cabdff1aSopenharmony_ci vsub.i32 q1, q1, q2 120cabdff1aSopenharmony_ci 121cabdff1aSopenharmony_ci vrshrn.s32 d18, q9, #10 122cabdff1aSopenharmony_ci vrshrn.s32 d19, q1, #10 123cabdff1aSopenharmony_ci 124cabdff1aSopenharmony_ci vqmovun.s16 \d, q9 125cabdff1aSopenharmony_ci.endm 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_cifunction put_h264_qpel16_h_lowpass_neon_packed 128cabdff1aSopenharmony_ci mov r4, lr 129cabdff1aSopenharmony_ci mov r12, #16 130cabdff1aSopenharmony_ci mov r3, #8 131cabdff1aSopenharmony_ci bl put_h264_qpel8_h_lowpass_neon 132cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #4 133cabdff1aSopenharmony_ci add r1, r1, #8 134cabdff1aSopenharmony_ci mov r12, #16 135cabdff1aSopenharmony_ci mov lr, r4 136cabdff1aSopenharmony_ci b put_h264_qpel8_h_lowpass_neon 137cabdff1aSopenharmony_ciendfunc 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_ci.macro h264_qpel_h_lowpass type 140cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_h_lowpass_neon 141cabdff1aSopenharmony_ci push {lr} 142cabdff1aSopenharmony_ci mov r12, #16 143cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_h_lowpass_neon 144cabdff1aSopenharmony_ci sub r0, r0, r3, lsl #4 145cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #4 146cabdff1aSopenharmony_ci add r0, r0, #8 147cabdff1aSopenharmony_ci add r1, r1, #8 148cabdff1aSopenharmony_ci mov r12, #16 149cabdff1aSopenharmony_ci pop {lr} 150cabdff1aSopenharmony_ciendfunc 151cabdff1aSopenharmony_ci 152cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_h_lowpass_neon 153cabdff1aSopenharmony_ci1: vld1.8 {d0, d1}, [r1], r2 154cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r1], r2 155cabdff1aSopenharmony_ci subs r12, r12, #2 156cabdff1aSopenharmony_ci lowpass_8 d0, d1, d16, d17, d0, d16 157cabdff1aSopenharmony_ci .ifc \type,avg 158cabdff1aSopenharmony_ci vld1.8 {d2}, [r0,:64], r3 159cabdff1aSopenharmony_ci vld1.8 {d3}, [r0,:64] 160cabdff1aSopenharmony_ci vrhadd.u8 d0, d0, d2 161cabdff1aSopenharmony_ci vrhadd.u8 d16, d16, d3 162cabdff1aSopenharmony_ci sub r0, r0, r3 163cabdff1aSopenharmony_ci .endif 164cabdff1aSopenharmony_ci vst1.8 {d0}, [r0,:64], r3 165cabdff1aSopenharmony_ci vst1.8 {d16}, [r0,:64], r3 166cabdff1aSopenharmony_ci bne 1b 167cabdff1aSopenharmony_ci bx lr 168cabdff1aSopenharmony_ciendfunc 169cabdff1aSopenharmony_ci.endm 170cabdff1aSopenharmony_ci 171cabdff1aSopenharmony_ci h264_qpel_h_lowpass put 172cabdff1aSopenharmony_ci h264_qpel_h_lowpass avg 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_ci.macro h264_qpel_h_lowpass_l2 type 175cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_h_lowpass_l2_neon 176cabdff1aSopenharmony_ci push {lr} 177cabdff1aSopenharmony_ci mov r12, #16 178cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_h_lowpass_l2_neon 179cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #4 180cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #4 181cabdff1aSopenharmony_ci sub r3, r3, r2, lsl #4 182cabdff1aSopenharmony_ci add r0, r0, #8 183cabdff1aSopenharmony_ci add r1, r1, #8 184cabdff1aSopenharmony_ci add r3, r3, #8 185cabdff1aSopenharmony_ci mov r12, #16 186cabdff1aSopenharmony_ci pop {lr} 187cabdff1aSopenharmony_ciendfunc 188cabdff1aSopenharmony_ci 189cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_h_lowpass_l2_neon 190cabdff1aSopenharmony_ci1: vld1.8 {d0, d1}, [r1], r2 191cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r1], r2 192cabdff1aSopenharmony_ci vld1.8 {d28}, [r3], r2 193cabdff1aSopenharmony_ci vld1.8 {d29}, [r3], r2 194cabdff1aSopenharmony_ci subs r12, r12, #2 195cabdff1aSopenharmony_ci lowpass_8 d0, d1, d16, d17, d0, d1 196cabdff1aSopenharmony_ci vrhadd.u8 q0, q0, q14 197cabdff1aSopenharmony_ci .ifc \type,avg 198cabdff1aSopenharmony_ci vld1.8 {d2}, [r0,:64], r2 199cabdff1aSopenharmony_ci vld1.8 {d3}, [r0,:64] 200cabdff1aSopenharmony_ci vrhadd.u8 q0, q0, q1 201cabdff1aSopenharmony_ci sub r0, r0, r2 202cabdff1aSopenharmony_ci .endif 203cabdff1aSopenharmony_ci vst1.8 {d0}, [r0,:64], r2 204cabdff1aSopenharmony_ci vst1.8 {d1}, [r0,:64], r2 205cabdff1aSopenharmony_ci bne 1b 206cabdff1aSopenharmony_ci bx lr 207cabdff1aSopenharmony_ciendfunc 208cabdff1aSopenharmony_ci.endm 209cabdff1aSopenharmony_ci 210cabdff1aSopenharmony_ci h264_qpel_h_lowpass_l2 put 211cabdff1aSopenharmony_ci h264_qpel_h_lowpass_l2 avg 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_cifunction put_h264_qpel16_v_lowpass_neon_packed 214cabdff1aSopenharmony_ci mov r4, lr 215cabdff1aSopenharmony_ci mov r2, #8 216cabdff1aSopenharmony_ci bl put_h264_qpel8_v_lowpass_neon 217cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 218cabdff1aSopenharmony_ci bl put_h264_qpel8_v_lowpass_neon 219cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #4 220cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 221cabdff1aSopenharmony_ci add r1, r1, #8 222cabdff1aSopenharmony_ci bl put_h264_qpel8_v_lowpass_neon 223cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 224cabdff1aSopenharmony_ci mov lr, r4 225cabdff1aSopenharmony_ci b put_h264_qpel8_v_lowpass_neon 226cabdff1aSopenharmony_ciendfunc 227cabdff1aSopenharmony_ci 228cabdff1aSopenharmony_ci.macro h264_qpel_v_lowpass type 229cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_v_lowpass_neon 230cabdff1aSopenharmony_ci mov r4, lr 231cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_neon 232cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 233cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_neon 234cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #4 235cabdff1aSopenharmony_ci add r0, r0, #8 236cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #4 237cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 238cabdff1aSopenharmony_ci add r1, r1, #8 239cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_neon 240cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 241cabdff1aSopenharmony_ci mov lr, r4 242cabdff1aSopenharmony_ciendfunc 243cabdff1aSopenharmony_ci 244cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_v_lowpass_neon 245cabdff1aSopenharmony_ci vld1.8 {d8}, [r1], r3 246cabdff1aSopenharmony_ci vld1.8 {d10}, [r1], r3 247cabdff1aSopenharmony_ci vld1.8 {d12}, [r1], r3 248cabdff1aSopenharmony_ci vld1.8 {d14}, [r1], r3 249cabdff1aSopenharmony_ci vld1.8 {d22}, [r1], r3 250cabdff1aSopenharmony_ci vld1.8 {d24}, [r1], r3 251cabdff1aSopenharmony_ci vld1.8 {d26}, [r1], r3 252cabdff1aSopenharmony_ci vld1.8 {d28}, [r1], r3 253cabdff1aSopenharmony_ci vld1.8 {d9}, [r1], r3 254cabdff1aSopenharmony_ci vld1.8 {d11}, [r1], r3 255cabdff1aSopenharmony_ci vld1.8 {d13}, [r1], r3 256cabdff1aSopenharmony_ci vld1.8 {d15}, [r1], r3 257cabdff1aSopenharmony_ci vld1.8 {d23}, [r1] 258cabdff1aSopenharmony_ci 259cabdff1aSopenharmony_ci transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14 260cabdff1aSopenharmony_ci lowpass_8 d8, d9, d10, d11, d8, d10 261cabdff1aSopenharmony_ci lowpass_8 d12, d13, d14, d15, d12, d14 262cabdff1aSopenharmony_ci lowpass_8 d22, d23, d24, d25, d22, d24 263cabdff1aSopenharmony_ci lowpass_8 d26, d27, d28, d29, d26, d28 264cabdff1aSopenharmony_ci transpose_8x8 d8, d10, d12, d14, d22, d24, d26, d28 265cabdff1aSopenharmony_ci 266cabdff1aSopenharmony_ci .ifc \type,avg 267cabdff1aSopenharmony_ci vld1.8 {d9}, [r0,:64], r2 268cabdff1aSopenharmony_ci vld1.8 {d11}, [r0,:64], r2 269cabdff1aSopenharmony_ci vld1.8 {d13}, [r0,:64], r2 270cabdff1aSopenharmony_ci vrhadd.u8 d8, d8, d9 271cabdff1aSopenharmony_ci vld1.8 {d15}, [r0,:64], r2 272cabdff1aSopenharmony_ci vrhadd.u8 d10, d10, d11 273cabdff1aSopenharmony_ci vld1.8 {d23}, [r0,:64], r2 274cabdff1aSopenharmony_ci vrhadd.u8 d12, d12, d13 275cabdff1aSopenharmony_ci vld1.8 {d25}, [r0,:64], r2 276cabdff1aSopenharmony_ci vrhadd.u8 d14, d14, d15 277cabdff1aSopenharmony_ci vld1.8 {d27}, [r0,:64], r2 278cabdff1aSopenharmony_ci vrhadd.u8 d22, d22, d23 279cabdff1aSopenharmony_ci vld1.8 {d29}, [r0,:64], r2 280cabdff1aSopenharmony_ci vrhadd.u8 d24, d24, d25 281cabdff1aSopenharmony_ci vrhadd.u8 d26, d26, d27 282cabdff1aSopenharmony_ci vrhadd.u8 d28, d28, d29 283cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #3 284cabdff1aSopenharmony_ci .endif 285cabdff1aSopenharmony_ci 286cabdff1aSopenharmony_ci vst1.8 {d8}, [r0,:64], r2 287cabdff1aSopenharmony_ci vst1.8 {d10}, [r0,:64], r2 288cabdff1aSopenharmony_ci vst1.8 {d12}, [r0,:64], r2 289cabdff1aSopenharmony_ci vst1.8 {d14}, [r0,:64], r2 290cabdff1aSopenharmony_ci vst1.8 {d22}, [r0,:64], r2 291cabdff1aSopenharmony_ci vst1.8 {d24}, [r0,:64], r2 292cabdff1aSopenharmony_ci vst1.8 {d26}, [r0,:64], r2 293cabdff1aSopenharmony_ci vst1.8 {d28}, [r0,:64], r2 294cabdff1aSopenharmony_ci 295cabdff1aSopenharmony_ci bx lr 296cabdff1aSopenharmony_ciendfunc 297cabdff1aSopenharmony_ci.endm 298cabdff1aSopenharmony_ci 299cabdff1aSopenharmony_ci h264_qpel_v_lowpass put 300cabdff1aSopenharmony_ci h264_qpel_v_lowpass avg 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci.macro h264_qpel_v_lowpass_l2 type 303cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_v_lowpass_l2_neon 304cabdff1aSopenharmony_ci mov r4, lr 305cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_l2_neon 306cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 307cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_l2_neon 308cabdff1aSopenharmony_ci sub r0, r0, r3, lsl #4 309cabdff1aSopenharmony_ci sub r12, r12, r2, lsl #4 310cabdff1aSopenharmony_ci add r0, r0, #8 311cabdff1aSopenharmony_ci add r12, r12, #8 312cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #4 313cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 314cabdff1aSopenharmony_ci add r1, r1, #8 315cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_l2_neon 316cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 317cabdff1aSopenharmony_ci mov lr, r4 318cabdff1aSopenharmony_ciendfunc 319cabdff1aSopenharmony_ci 320cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_v_lowpass_l2_neon 321cabdff1aSopenharmony_ci vld1.8 {d8}, [r1], r3 322cabdff1aSopenharmony_ci vld1.8 {d10}, [r1], r3 323cabdff1aSopenharmony_ci vld1.8 {d12}, [r1], r3 324cabdff1aSopenharmony_ci vld1.8 {d14}, [r1], r3 325cabdff1aSopenharmony_ci vld1.8 {d22}, [r1], r3 326cabdff1aSopenharmony_ci vld1.8 {d24}, [r1], r3 327cabdff1aSopenharmony_ci vld1.8 {d26}, [r1], r3 328cabdff1aSopenharmony_ci vld1.8 {d28}, [r1], r3 329cabdff1aSopenharmony_ci vld1.8 {d9}, [r1], r3 330cabdff1aSopenharmony_ci vld1.8 {d11}, [r1], r3 331cabdff1aSopenharmony_ci vld1.8 {d13}, [r1], r3 332cabdff1aSopenharmony_ci vld1.8 {d15}, [r1], r3 333cabdff1aSopenharmony_ci vld1.8 {d23}, [r1] 334cabdff1aSopenharmony_ci 335cabdff1aSopenharmony_ci transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14 336cabdff1aSopenharmony_ci lowpass_8 d8, d9, d10, d11, d8, d9 337cabdff1aSopenharmony_ci lowpass_8 d12, d13, d14, d15, d12, d13 338cabdff1aSopenharmony_ci lowpass_8 d22, d23, d24, d25, d22, d23 339cabdff1aSopenharmony_ci lowpass_8 d26, d27, d28, d29, d26, d27 340cabdff1aSopenharmony_ci transpose_8x8 d8, d9, d12, d13, d22, d23, d26, d27 341cabdff1aSopenharmony_ci 342cabdff1aSopenharmony_ci vld1.8 {d0}, [r12], r2 343cabdff1aSopenharmony_ci vld1.8 {d1}, [r12], r2 344cabdff1aSopenharmony_ci vld1.8 {d2}, [r12], r2 345cabdff1aSopenharmony_ci vld1.8 {d3}, [r12], r2 346cabdff1aSopenharmony_ci vld1.8 {d4}, [r12], r2 347cabdff1aSopenharmony_ci vrhadd.u8 q0, q0, q4 348cabdff1aSopenharmony_ci vld1.8 {d5}, [r12], r2 349cabdff1aSopenharmony_ci vrhadd.u8 q1, q1, q6 350cabdff1aSopenharmony_ci vld1.8 {d10}, [r12], r2 351cabdff1aSopenharmony_ci vrhadd.u8 q2, q2, q11 352cabdff1aSopenharmony_ci vld1.8 {d11}, [r12], r2 353cabdff1aSopenharmony_ci vrhadd.u8 q5, q5, q13 354cabdff1aSopenharmony_ci 355cabdff1aSopenharmony_ci .ifc \type,avg 356cabdff1aSopenharmony_ci vld1.8 {d16}, [r0,:64], r3 357cabdff1aSopenharmony_ci vld1.8 {d17}, [r0,:64], r3 358cabdff1aSopenharmony_ci vrhadd.u8 d0, d0, d16 359cabdff1aSopenharmony_ci vld1.8 {d16}, [r0,:64], r3 360cabdff1aSopenharmony_ci vrhadd.u8 d1, d1, d17 361cabdff1aSopenharmony_ci vld1.8 {d17}, [r0,:64], r3 362cabdff1aSopenharmony_ci vrhadd.u8 d2, d2, d16 363cabdff1aSopenharmony_ci vld1.8 {d16}, [r0,:64], r3 364cabdff1aSopenharmony_ci vrhadd.u8 d3, d3, d17 365cabdff1aSopenharmony_ci vld1.8 {d17}, [r0,:64], r3 366cabdff1aSopenharmony_ci vrhadd.u8 d4, d4, d16 367cabdff1aSopenharmony_ci vld1.8 {d16}, [r0,:64], r3 368cabdff1aSopenharmony_ci vrhadd.u8 d5, d5, d17 369cabdff1aSopenharmony_ci vld1.8 {d17}, [r0,:64], r3 370cabdff1aSopenharmony_ci vrhadd.u8 d10, d10, d16 371cabdff1aSopenharmony_ci vrhadd.u8 d11, d11, d17 372cabdff1aSopenharmony_ci sub r0, r0, r3, lsl #3 373cabdff1aSopenharmony_ci .endif 374cabdff1aSopenharmony_ci 375cabdff1aSopenharmony_ci vst1.8 {d0}, [r0,:64], r3 376cabdff1aSopenharmony_ci vst1.8 {d1}, [r0,:64], r3 377cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r3 378cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r3 379cabdff1aSopenharmony_ci vst1.8 {d4}, [r0,:64], r3 380cabdff1aSopenharmony_ci vst1.8 {d5}, [r0,:64], r3 381cabdff1aSopenharmony_ci vst1.8 {d10}, [r0,:64], r3 382cabdff1aSopenharmony_ci vst1.8 {d11}, [r0,:64], r3 383cabdff1aSopenharmony_ci 384cabdff1aSopenharmony_ci bx lr 385cabdff1aSopenharmony_ciendfunc 386cabdff1aSopenharmony_ci.endm 387cabdff1aSopenharmony_ci 388cabdff1aSopenharmony_ci h264_qpel_v_lowpass_l2 put 389cabdff1aSopenharmony_ci h264_qpel_v_lowpass_l2 avg 390cabdff1aSopenharmony_ci 391cabdff1aSopenharmony_cifunction put_h264_qpel8_hv_lowpass_neon_top 392cabdff1aSopenharmony_ci lowpass_const r12 393cabdff1aSopenharmony_ci mov r12, #12 394cabdff1aSopenharmony_ci1: vld1.8 {d0, d1}, [r1], r3 395cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r1], r3 396cabdff1aSopenharmony_ci subs r12, r12, #2 397cabdff1aSopenharmony_ci lowpass_8 d0, d1, d16, d17, q11, q12, narrow=0 398cabdff1aSopenharmony_ci vst1.8 {d22-d25}, [r4,:128]! 399cabdff1aSopenharmony_ci bne 1b 400cabdff1aSopenharmony_ci 401cabdff1aSopenharmony_ci vld1.8 {d0, d1}, [r1] 402cabdff1aSopenharmony_ci lowpass_8_1 d0, d1, q12, narrow=0 403cabdff1aSopenharmony_ci 404cabdff1aSopenharmony_ci mov r12, #-16 405cabdff1aSopenharmony_ci add r4, r4, r12 406cabdff1aSopenharmony_ci vld1.8 {d30,d31}, [r4,:128], r12 407cabdff1aSopenharmony_ci vld1.8 {d20,d21}, [r4,:128], r12 408cabdff1aSopenharmony_ci vld1.8 {d18,d19}, [r4,:128], r12 409cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r4,:128], r12 410cabdff1aSopenharmony_ci vld1.8 {d14,d15}, [r4,:128], r12 411cabdff1aSopenharmony_ci vld1.8 {d12,d13}, [r4,:128], r12 412cabdff1aSopenharmony_ci vld1.8 {d10,d11}, [r4,:128], r12 413cabdff1aSopenharmony_ci vld1.8 {d8, d9}, [r4,:128], r12 414cabdff1aSopenharmony_ci vld1.8 {d6, d7}, [r4,:128], r12 415cabdff1aSopenharmony_ci vld1.8 {d4, d5}, [r4,:128], r12 416cabdff1aSopenharmony_ci vld1.8 {d2, d3}, [r4,:128], r12 417cabdff1aSopenharmony_ci vld1.8 {d0, d1}, [r4,:128] 418cabdff1aSopenharmony_ci 419cabdff1aSopenharmony_ci swap4 d1, d3, d5, d7, d8, d10, d12, d14 420cabdff1aSopenharmony_ci transpose16_4x4 q0, q1, q2, q3, q4, q5, q6, q7 421cabdff1aSopenharmony_ci 422cabdff1aSopenharmony_ci swap4 d17, d19, d21, d31, d24, d26, d28, d22 423cabdff1aSopenharmony_ci transpose16_4x4 q8, q9, q10, q15, q12, q13, q14, q11 424cabdff1aSopenharmony_ci 425cabdff1aSopenharmony_ci vst1.8 {d30,d31}, [r4,:128]! 426cabdff1aSopenharmony_ci vst1.8 {d6, d7}, [r4,:128]! 427cabdff1aSopenharmony_ci vst1.8 {d20,d21}, [r4,:128]! 428cabdff1aSopenharmony_ci vst1.8 {d4, d5}, [r4,:128]! 429cabdff1aSopenharmony_ci vst1.8 {d18,d19}, [r4,:128]! 430cabdff1aSopenharmony_ci vst1.8 {d2, d3}, [r4,:128]! 431cabdff1aSopenharmony_ci vst1.8 {d16,d17}, [r4,:128]! 432cabdff1aSopenharmony_ci vst1.8 {d0, d1}, [r4,:128] 433cabdff1aSopenharmony_ci 434cabdff1aSopenharmony_ci lowpass_8.16 q4, q12, d8, d9, d24, d25, d8 435cabdff1aSopenharmony_ci lowpass_8.16 q5, q13, d10, d11, d26, d27, d9 436cabdff1aSopenharmony_ci lowpass_8.16 q6, q14, d12, d13, d28, d29, d10 437cabdff1aSopenharmony_ci lowpass_8.16 q7, q11, d14, d15, d22, d23, d11 438cabdff1aSopenharmony_ci 439cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r4,:128], r12 440cabdff1aSopenharmony_ci vld1.8 {d30,d31}, [r4,:128], r12 441cabdff1aSopenharmony_ci lowpass_8.16 q8, q15, d16, d17, d30, d31, d12 442cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r4,:128], r12 443cabdff1aSopenharmony_ci vld1.8 {d30,d31}, [r4,:128], r12 444cabdff1aSopenharmony_ci lowpass_8.16 q8, q15, d16, d17, d30, d31, d13 445cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r4,:128], r12 446cabdff1aSopenharmony_ci vld1.8 {d30,d31}, [r4,:128], r12 447cabdff1aSopenharmony_ci lowpass_8.16 q8, q15, d16, d17, d30, d31, d14 448cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r4,:128], r12 449cabdff1aSopenharmony_ci vld1.8 {d30,d31}, [r4,:128] 450cabdff1aSopenharmony_ci lowpass_8.16 q8, q15, d16, d17, d30, d31, d15 451cabdff1aSopenharmony_ci 452cabdff1aSopenharmony_ci transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11 453cabdff1aSopenharmony_ci 454cabdff1aSopenharmony_ci bx lr 455cabdff1aSopenharmony_ciendfunc 456cabdff1aSopenharmony_ci 457cabdff1aSopenharmony_ci.macro h264_qpel8_hv_lowpass type 458cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_hv_lowpass_neon 459cabdff1aSopenharmony_ci mov r10, lr 460cabdff1aSopenharmony_ci bl put_h264_qpel8_hv_lowpass_neon_top 461cabdff1aSopenharmony_ci .ifc \type,avg 462cabdff1aSopenharmony_ci vld1.8 {d0}, [r0,:64], r2 463cabdff1aSopenharmony_ci vld1.8 {d1}, [r0,:64], r2 464cabdff1aSopenharmony_ci vld1.8 {d2}, [r0,:64], r2 465cabdff1aSopenharmony_ci vld1.8 {d3}, [r0,:64], r2 466cabdff1aSopenharmony_ci vrhadd.u8 q6, q6, q0 467cabdff1aSopenharmony_ci vld1.8 {d4}, [r0,:64], r2 468cabdff1aSopenharmony_ci vld1.8 {d5}, [r0,:64], r2 469cabdff1aSopenharmony_ci vrhadd.u8 q7, q7, q1 470cabdff1aSopenharmony_ci vld1.8 {d6}, [r0,:64], r2 471cabdff1aSopenharmony_ci vld1.8 {d7}, [r0,:64], r2 472cabdff1aSopenharmony_ci vrhadd.u8 q4, q4, q2 473cabdff1aSopenharmony_ci vrhadd.u8 q5, q5, q3 474cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #3 475cabdff1aSopenharmony_ci .endif 476cabdff1aSopenharmony_ci 477cabdff1aSopenharmony_ci vst1.8 {d12}, [r0,:64], r2 478cabdff1aSopenharmony_ci vst1.8 {d13}, [r0,:64], r2 479cabdff1aSopenharmony_ci vst1.8 {d14}, [r0,:64], r2 480cabdff1aSopenharmony_ci vst1.8 {d15}, [r0,:64], r2 481cabdff1aSopenharmony_ci vst1.8 {d8}, [r0,:64], r2 482cabdff1aSopenharmony_ci vst1.8 {d9}, [r0,:64], r2 483cabdff1aSopenharmony_ci vst1.8 {d10}, [r0,:64], r2 484cabdff1aSopenharmony_ci vst1.8 {d11}, [r0,:64], r2 485cabdff1aSopenharmony_ci 486cabdff1aSopenharmony_ci mov lr, r10 487cabdff1aSopenharmony_ci bx lr 488cabdff1aSopenharmony_ciendfunc 489cabdff1aSopenharmony_ci.endm 490cabdff1aSopenharmony_ci 491cabdff1aSopenharmony_ci h264_qpel8_hv_lowpass put 492cabdff1aSopenharmony_ci h264_qpel8_hv_lowpass avg 493cabdff1aSopenharmony_ci 494cabdff1aSopenharmony_ci.macro h264_qpel8_hv_lowpass_l2 type 495cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_hv_lowpass_l2_neon 496cabdff1aSopenharmony_ci mov r10, lr 497cabdff1aSopenharmony_ci bl put_h264_qpel8_hv_lowpass_neon_top 498cabdff1aSopenharmony_ci 499cabdff1aSopenharmony_ci vld1.8 {d0, d1}, [r2,:128]! 500cabdff1aSopenharmony_ci vld1.8 {d2, d3}, [r2,:128]! 501cabdff1aSopenharmony_ci vrhadd.u8 q0, q0, q6 502cabdff1aSopenharmony_ci vld1.8 {d4, d5}, [r2,:128]! 503cabdff1aSopenharmony_ci vrhadd.u8 q1, q1, q7 504cabdff1aSopenharmony_ci vld1.8 {d6, d7}, [r2,:128]! 505cabdff1aSopenharmony_ci vrhadd.u8 q2, q2, q4 506cabdff1aSopenharmony_ci vrhadd.u8 q3, q3, q5 507cabdff1aSopenharmony_ci .ifc \type,avg 508cabdff1aSopenharmony_ci vld1.8 {d16}, [r0,:64], r3 509cabdff1aSopenharmony_ci vld1.8 {d17}, [r0,:64], r3 510cabdff1aSopenharmony_ci vld1.8 {d18}, [r0,:64], r3 511cabdff1aSopenharmony_ci vld1.8 {d19}, [r0,:64], r3 512cabdff1aSopenharmony_ci vrhadd.u8 q0, q0, q8 513cabdff1aSopenharmony_ci vld1.8 {d20}, [r0,:64], r3 514cabdff1aSopenharmony_ci vld1.8 {d21}, [r0,:64], r3 515cabdff1aSopenharmony_ci vrhadd.u8 q1, q1, q9 516cabdff1aSopenharmony_ci vld1.8 {d22}, [r0,:64], r3 517cabdff1aSopenharmony_ci vld1.8 {d23}, [r0,:64], r3 518cabdff1aSopenharmony_ci vrhadd.u8 q2, q2, q10 519cabdff1aSopenharmony_ci vrhadd.u8 q3, q3, q11 520cabdff1aSopenharmony_ci sub r0, r0, r3, lsl #3 521cabdff1aSopenharmony_ci .endif 522cabdff1aSopenharmony_ci vst1.8 {d0}, [r0,:64], r3 523cabdff1aSopenharmony_ci vst1.8 {d1}, [r0,:64], r3 524cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r3 525cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r3 526cabdff1aSopenharmony_ci vst1.8 {d4}, [r0,:64], r3 527cabdff1aSopenharmony_ci vst1.8 {d5}, [r0,:64], r3 528cabdff1aSopenharmony_ci vst1.8 {d6}, [r0,:64], r3 529cabdff1aSopenharmony_ci vst1.8 {d7}, [r0,:64], r3 530cabdff1aSopenharmony_ci 531cabdff1aSopenharmony_ci mov lr, r10 532cabdff1aSopenharmony_ci bx lr 533cabdff1aSopenharmony_ciendfunc 534cabdff1aSopenharmony_ci.endm 535cabdff1aSopenharmony_ci 536cabdff1aSopenharmony_ci h264_qpel8_hv_lowpass_l2 put 537cabdff1aSopenharmony_ci h264_qpel8_hv_lowpass_l2 avg 538cabdff1aSopenharmony_ci 539cabdff1aSopenharmony_ci.macro h264_qpel16_hv type 540cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_hv_lowpass_neon 541cabdff1aSopenharmony_ci mov r9, lr 542cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_neon 543cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 544cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_neon 545cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #4 546cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 547cabdff1aSopenharmony_ci add r1, r1, #8 548cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #4 549cabdff1aSopenharmony_ci add r0, r0, #8 550cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_neon 551cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 552cabdff1aSopenharmony_ci mov lr, r9 553cabdff1aSopenharmony_ci b \type\()_h264_qpel8_hv_lowpass_neon 554cabdff1aSopenharmony_ciendfunc 555cabdff1aSopenharmony_ci 556cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_hv_lowpass_l2_neon 557cabdff1aSopenharmony_ci mov r9, lr 558cabdff1aSopenharmony_ci sub r2, r4, #256 559cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_l2_neon 560cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 561cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_l2_neon 562cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #4 563cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 564cabdff1aSopenharmony_ci add r1, r1, #8 565cabdff1aSopenharmony_ci sub r0, r0, r3, lsl #4 566cabdff1aSopenharmony_ci add r0, r0, #8 567cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_l2_neon 568cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #2 569cabdff1aSopenharmony_ci mov lr, r9 570cabdff1aSopenharmony_ci b \type\()_h264_qpel8_hv_lowpass_l2_neon 571cabdff1aSopenharmony_ciendfunc 572cabdff1aSopenharmony_ci.endm 573cabdff1aSopenharmony_ci 574cabdff1aSopenharmony_ci h264_qpel16_hv put 575cabdff1aSopenharmony_ci h264_qpel16_hv avg 576cabdff1aSopenharmony_ci 577cabdff1aSopenharmony_ci.macro h264_qpel8 type 578cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc10_neon, export=1 579cabdff1aSopenharmony_ci lowpass_const r3 580cabdff1aSopenharmony_ci mov r3, r1 581cabdff1aSopenharmony_ci sub r1, r1, #2 582cabdff1aSopenharmony_ci mov r12, #8 583cabdff1aSopenharmony_ci b \type\()_h264_qpel8_h_lowpass_l2_neon 584cabdff1aSopenharmony_ciendfunc 585cabdff1aSopenharmony_ci 586cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc20_neon, export=1 587cabdff1aSopenharmony_ci lowpass_const r3 588cabdff1aSopenharmony_ci sub r1, r1, #2 589cabdff1aSopenharmony_ci mov r3, r2 590cabdff1aSopenharmony_ci mov r12, #8 591cabdff1aSopenharmony_ci b \type\()_h264_qpel8_h_lowpass_neon 592cabdff1aSopenharmony_ciendfunc 593cabdff1aSopenharmony_ci 594cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc30_neon, export=1 595cabdff1aSopenharmony_ci lowpass_const r3 596cabdff1aSopenharmony_ci add r3, r1, #1 597cabdff1aSopenharmony_ci sub r1, r1, #2 598cabdff1aSopenharmony_ci mov r12, #8 599cabdff1aSopenharmony_ci b \type\()_h264_qpel8_h_lowpass_l2_neon 600cabdff1aSopenharmony_ciendfunc 601cabdff1aSopenharmony_ci 602cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc01_neon, export=1 603cabdff1aSopenharmony_ci push {lr} 604cabdff1aSopenharmony_ci mov r12, r1 605cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc01: 606cabdff1aSopenharmony_ci lowpass_const r3 607cabdff1aSopenharmony_ci mov r3, r2 608cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 609cabdff1aSopenharmony_ci vpush {d8-d15} 610cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_l2_neon 611cabdff1aSopenharmony_ci vpop {d8-d15} 612cabdff1aSopenharmony_ci pop {pc} 613cabdff1aSopenharmony_ciendfunc 614cabdff1aSopenharmony_ci 615cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc11_neon, export=1 616cabdff1aSopenharmony_ci push {r0, r1, r11, lr} 617cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc11: 618cabdff1aSopenharmony_ci lowpass_const r3 619cabdff1aSopenharmony_ci mov r11, sp 620cabdff1aSopenharmony_ciA bic sp, sp, #15 621cabdff1aSopenharmony_ciT bic r0, r11, #15 622cabdff1aSopenharmony_ciT mov sp, r0 623cabdff1aSopenharmony_ci sub sp, sp, #64 624cabdff1aSopenharmony_ci mov r0, sp 625cabdff1aSopenharmony_ci sub r1, r1, #2 626cabdff1aSopenharmony_ci mov r3, #8 627cabdff1aSopenharmony_ci mov r12, #8 628cabdff1aSopenharmony_ci vpush {d8-d15} 629cabdff1aSopenharmony_ci bl put_h264_qpel8_h_lowpass_neon 630cabdff1aSopenharmony_ci ldrd r0, r1, [r11], #8 631cabdff1aSopenharmony_ci mov r3, r2 632cabdff1aSopenharmony_ci add r12, sp, #64 633cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 634cabdff1aSopenharmony_ci mov r2, #8 635cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_l2_neon 636cabdff1aSopenharmony_ci vpop {d8-d15} 637cabdff1aSopenharmony_ci mov sp, r11 638cabdff1aSopenharmony_ci pop {r11, pc} 639cabdff1aSopenharmony_ciendfunc 640cabdff1aSopenharmony_ci 641cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc21_neon, export=1 642cabdff1aSopenharmony_ci push {r0, r1, r4, r10, r11, lr} 643cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc21: 644cabdff1aSopenharmony_ci lowpass_const r3 645cabdff1aSopenharmony_ci mov r11, sp 646cabdff1aSopenharmony_ciA bic sp, sp, #15 647cabdff1aSopenharmony_ciT bic r0, r11, #15 648cabdff1aSopenharmony_ciT mov sp, r0 649cabdff1aSopenharmony_ci sub sp, sp, #(8*8+16*12) 650cabdff1aSopenharmony_ci sub r1, r1, #2 651cabdff1aSopenharmony_ci mov r3, #8 652cabdff1aSopenharmony_ci mov r0, sp 653cabdff1aSopenharmony_ci mov r12, #8 654cabdff1aSopenharmony_ci vpush {d8-d15} 655cabdff1aSopenharmony_ci bl put_h264_qpel8_h_lowpass_neon 656cabdff1aSopenharmony_ci mov r4, r0 657cabdff1aSopenharmony_ci ldrd r0, r1, [r11], #8 658cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 659cabdff1aSopenharmony_ci sub r1, r1, #2 660cabdff1aSopenharmony_ci mov r3, r2 661cabdff1aSopenharmony_ci sub r2, r4, #64 662cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_l2_neon 663cabdff1aSopenharmony_ci vpop {d8-d15} 664cabdff1aSopenharmony_ci mov sp, r11 665cabdff1aSopenharmony_ci pop {r4, r10, r11, pc} 666cabdff1aSopenharmony_ciendfunc 667cabdff1aSopenharmony_ci 668cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc31_neon, export=1 669cabdff1aSopenharmony_ci add r1, r1, #1 670cabdff1aSopenharmony_ci push {r0, r1, r11, lr} 671cabdff1aSopenharmony_ci sub r1, r1, #1 672cabdff1aSopenharmony_ci b \type\()_h264_qpel8_mc11 673cabdff1aSopenharmony_ciendfunc 674cabdff1aSopenharmony_ci 675cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc02_neon, export=1 676cabdff1aSopenharmony_ci push {lr} 677cabdff1aSopenharmony_ci lowpass_const r3 678cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 679cabdff1aSopenharmony_ci mov r3, r2 680cabdff1aSopenharmony_ci vpush {d8-d15} 681cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_v_lowpass_neon 682cabdff1aSopenharmony_ci vpop {d8-d15} 683cabdff1aSopenharmony_ci pop {pc} 684cabdff1aSopenharmony_ciendfunc 685cabdff1aSopenharmony_ci 686cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc12_neon, export=1 687cabdff1aSopenharmony_ci push {r0, r1, r4, r10, r11, lr} 688cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc12: 689cabdff1aSopenharmony_ci lowpass_const r3 690cabdff1aSopenharmony_ci mov r11, sp 691cabdff1aSopenharmony_ciA bic sp, sp, #15 692cabdff1aSopenharmony_ciT bic r0, r11, #15 693cabdff1aSopenharmony_ciT mov sp, r0 694cabdff1aSopenharmony_ci sub sp, sp, #(8*8+16*12) 695cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 696cabdff1aSopenharmony_ci mov r3, r2 697cabdff1aSopenharmony_ci mov r2, #8 698cabdff1aSopenharmony_ci mov r0, sp 699cabdff1aSopenharmony_ci vpush {d8-d15} 700cabdff1aSopenharmony_ci bl put_h264_qpel8_v_lowpass_neon 701cabdff1aSopenharmony_ci mov r4, r0 702cabdff1aSopenharmony_ci ldrd r0, r1, [r11], #8 703cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #1 704cabdff1aSopenharmony_ci sub r1, r1, #2 705cabdff1aSopenharmony_ci sub r2, r4, #64 706cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_l2_neon 707cabdff1aSopenharmony_ci vpop {d8-d15} 708cabdff1aSopenharmony_ci mov sp, r11 709cabdff1aSopenharmony_ci pop {r4, r10, r11, pc} 710cabdff1aSopenharmony_ciendfunc 711cabdff1aSopenharmony_ci 712cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc22_neon, export=1 713cabdff1aSopenharmony_ci push {r4, r10, r11, lr} 714cabdff1aSopenharmony_ci mov r11, sp 715cabdff1aSopenharmony_ciA bic sp, sp, #15 716cabdff1aSopenharmony_ciT bic r4, r11, #15 717cabdff1aSopenharmony_ciT mov sp, r4 718cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 719cabdff1aSopenharmony_ci sub r1, r1, #2 720cabdff1aSopenharmony_ci mov r3, r2 721cabdff1aSopenharmony_ci sub sp, sp, #(16*12) 722cabdff1aSopenharmony_ci mov r4, sp 723cabdff1aSopenharmony_ci vpush {d8-d15} 724cabdff1aSopenharmony_ci bl \type\()_h264_qpel8_hv_lowpass_neon 725cabdff1aSopenharmony_ci vpop {d8-d15} 726cabdff1aSopenharmony_ci mov sp, r11 727cabdff1aSopenharmony_ci pop {r4, r10, r11, pc} 728cabdff1aSopenharmony_ciendfunc 729cabdff1aSopenharmony_ci 730cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc32_neon, export=1 731cabdff1aSopenharmony_ci push {r0, r1, r4, r10, r11, lr} 732cabdff1aSopenharmony_ci add r1, r1, #1 733cabdff1aSopenharmony_ci b \type\()_h264_qpel8_mc12 734cabdff1aSopenharmony_ciendfunc 735cabdff1aSopenharmony_ci 736cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc03_neon, export=1 737cabdff1aSopenharmony_ci push {lr} 738cabdff1aSopenharmony_ci add r12, r1, r2 739cabdff1aSopenharmony_ci b \type\()_h264_qpel8_mc01 740cabdff1aSopenharmony_ciendfunc 741cabdff1aSopenharmony_ci 742cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc13_neon, export=1 743cabdff1aSopenharmony_ci push {r0, r1, r11, lr} 744cabdff1aSopenharmony_ci add r1, r1, r2 745cabdff1aSopenharmony_ci b \type\()_h264_qpel8_mc11 746cabdff1aSopenharmony_ciendfunc 747cabdff1aSopenharmony_ci 748cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc23_neon, export=1 749cabdff1aSopenharmony_ci push {r0, r1, r4, r10, r11, lr} 750cabdff1aSopenharmony_ci add r1, r1, r2 751cabdff1aSopenharmony_ci b \type\()_h264_qpel8_mc21 752cabdff1aSopenharmony_ciendfunc 753cabdff1aSopenharmony_ci 754cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc33_neon, export=1 755cabdff1aSopenharmony_ci add r1, r1, #1 756cabdff1aSopenharmony_ci push {r0, r1, r11, lr} 757cabdff1aSopenharmony_ci add r1, r1, r2 758cabdff1aSopenharmony_ci sub r1, r1, #1 759cabdff1aSopenharmony_ci b \type\()_h264_qpel8_mc11 760cabdff1aSopenharmony_ciendfunc 761cabdff1aSopenharmony_ci.endm 762cabdff1aSopenharmony_ci 763cabdff1aSopenharmony_ci h264_qpel8 put 764cabdff1aSopenharmony_ci h264_qpel8 avg 765cabdff1aSopenharmony_ci 766cabdff1aSopenharmony_ci.macro h264_qpel16 type 767cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc10_neon, export=1 768cabdff1aSopenharmony_ci lowpass_const r3 769cabdff1aSopenharmony_ci mov r3, r1 770cabdff1aSopenharmony_ci sub r1, r1, #2 771cabdff1aSopenharmony_ci b \type\()_h264_qpel16_h_lowpass_l2_neon 772cabdff1aSopenharmony_ciendfunc 773cabdff1aSopenharmony_ci 774cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc20_neon, export=1 775cabdff1aSopenharmony_ci lowpass_const r3 776cabdff1aSopenharmony_ci sub r1, r1, #2 777cabdff1aSopenharmony_ci mov r3, r2 778cabdff1aSopenharmony_ci b \type\()_h264_qpel16_h_lowpass_neon 779cabdff1aSopenharmony_ciendfunc 780cabdff1aSopenharmony_ci 781cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc30_neon, export=1 782cabdff1aSopenharmony_ci lowpass_const r3 783cabdff1aSopenharmony_ci add r3, r1, #1 784cabdff1aSopenharmony_ci sub r1, r1, #2 785cabdff1aSopenharmony_ci b \type\()_h264_qpel16_h_lowpass_l2_neon 786cabdff1aSopenharmony_ciendfunc 787cabdff1aSopenharmony_ci 788cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc01_neon, export=1 789cabdff1aSopenharmony_ci push {r4, lr} 790cabdff1aSopenharmony_ci mov r12, r1 791cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc01: 792cabdff1aSopenharmony_ci lowpass_const r3 793cabdff1aSopenharmony_ci mov r3, r2 794cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 795cabdff1aSopenharmony_ci vpush {d8-d15} 796cabdff1aSopenharmony_ci bl \type\()_h264_qpel16_v_lowpass_l2_neon 797cabdff1aSopenharmony_ci vpop {d8-d15} 798cabdff1aSopenharmony_ci pop {r4, pc} 799cabdff1aSopenharmony_ciendfunc 800cabdff1aSopenharmony_ci 801cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc11_neon, export=1 802cabdff1aSopenharmony_ci push {r0, r1, r4, r11, lr} 803cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc11: 804cabdff1aSopenharmony_ci lowpass_const r3 805cabdff1aSopenharmony_ci mov r11, sp 806cabdff1aSopenharmony_ciA bic sp, sp, #15 807cabdff1aSopenharmony_ciT bic r0, r11, #15 808cabdff1aSopenharmony_ciT mov sp, r0 809cabdff1aSopenharmony_ci sub sp, sp, #256 810cabdff1aSopenharmony_ci mov r0, sp 811cabdff1aSopenharmony_ci sub r1, r1, #2 812cabdff1aSopenharmony_ci mov r3, #16 813cabdff1aSopenharmony_ci vpush {d8-d15} 814cabdff1aSopenharmony_ci bl put_h264_qpel16_h_lowpass_neon 815cabdff1aSopenharmony_ci ldrd r0, r1, [r11], #8 816cabdff1aSopenharmony_ci mov r3, r2 817cabdff1aSopenharmony_ci add r12, sp, #64 818cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 819cabdff1aSopenharmony_ci mov r2, #16 820cabdff1aSopenharmony_ci bl \type\()_h264_qpel16_v_lowpass_l2_neon 821cabdff1aSopenharmony_ci vpop {d8-d15} 822cabdff1aSopenharmony_ci mov sp, r11 823cabdff1aSopenharmony_ci pop {r4, r11, pc} 824cabdff1aSopenharmony_ciendfunc 825cabdff1aSopenharmony_ci 826cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc21_neon, export=1 827cabdff1aSopenharmony_ci push {r0, r1, r4-r5, r9-r11, lr} 828cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc21: 829cabdff1aSopenharmony_ci lowpass_const r3 830cabdff1aSopenharmony_ci mov r11, sp 831cabdff1aSopenharmony_ciA bic sp, sp, #15 832cabdff1aSopenharmony_ciT bic r0, r11, #15 833cabdff1aSopenharmony_ciT mov sp, r0 834cabdff1aSopenharmony_ci sub sp, sp, #(16*16+16*12) 835cabdff1aSopenharmony_ci sub r1, r1, #2 836cabdff1aSopenharmony_ci mov r0, sp 837cabdff1aSopenharmony_ci vpush {d8-d15} 838cabdff1aSopenharmony_ci bl put_h264_qpel16_h_lowpass_neon_packed 839cabdff1aSopenharmony_ci mov r4, r0 840cabdff1aSopenharmony_ci ldrd r0, r1, [r11], #8 841cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 842cabdff1aSopenharmony_ci sub r1, r1, #2 843cabdff1aSopenharmony_ci mov r3, r2 844cabdff1aSopenharmony_ci bl \type\()_h264_qpel16_hv_lowpass_l2_neon 845cabdff1aSopenharmony_ci vpop {d8-d15} 846cabdff1aSopenharmony_ci mov sp, r11 847cabdff1aSopenharmony_ci pop {r4-r5, r9-r11, pc} 848cabdff1aSopenharmony_ciendfunc 849cabdff1aSopenharmony_ci 850cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc31_neon, export=1 851cabdff1aSopenharmony_ci add r1, r1, #1 852cabdff1aSopenharmony_ci push {r0, r1, r4, r11, lr} 853cabdff1aSopenharmony_ci sub r1, r1, #1 854cabdff1aSopenharmony_ci b \type\()_h264_qpel16_mc11 855cabdff1aSopenharmony_ciendfunc 856cabdff1aSopenharmony_ci 857cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc02_neon, export=1 858cabdff1aSopenharmony_ci push {r4, lr} 859cabdff1aSopenharmony_ci lowpass_const r3 860cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 861cabdff1aSopenharmony_ci mov r3, r2 862cabdff1aSopenharmony_ci vpush {d8-d15} 863cabdff1aSopenharmony_ci bl \type\()_h264_qpel16_v_lowpass_neon 864cabdff1aSopenharmony_ci vpop {d8-d15} 865cabdff1aSopenharmony_ci pop {r4, pc} 866cabdff1aSopenharmony_ciendfunc 867cabdff1aSopenharmony_ci 868cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc12_neon, export=1 869cabdff1aSopenharmony_ci push {r0, r1, r4-r5, r9-r11, lr} 870cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc12: 871cabdff1aSopenharmony_ci lowpass_const r3 872cabdff1aSopenharmony_ci mov r11, sp 873cabdff1aSopenharmony_ciA bic sp, sp, #15 874cabdff1aSopenharmony_ciT bic r0, r11, #15 875cabdff1aSopenharmony_ciT mov sp, r0 876cabdff1aSopenharmony_ci sub sp, sp, #(16*16+16*12) 877cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 878cabdff1aSopenharmony_ci mov r0, sp 879cabdff1aSopenharmony_ci mov r3, r2 880cabdff1aSopenharmony_ci vpush {d8-d15} 881cabdff1aSopenharmony_ci bl put_h264_qpel16_v_lowpass_neon_packed 882cabdff1aSopenharmony_ci mov r4, r0 883cabdff1aSopenharmony_ci ldrd r0, r1, [r11], #8 884cabdff1aSopenharmony_ci sub r1, r1, r3, lsl #1 885cabdff1aSopenharmony_ci sub r1, r1, #2 886cabdff1aSopenharmony_ci mov r2, r3 887cabdff1aSopenharmony_ci bl \type\()_h264_qpel16_hv_lowpass_l2_neon 888cabdff1aSopenharmony_ci vpop {d8-d15} 889cabdff1aSopenharmony_ci mov sp, r11 890cabdff1aSopenharmony_ci pop {r4-r5, r9-r11, pc} 891cabdff1aSopenharmony_ciendfunc 892cabdff1aSopenharmony_ci 893cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc22_neon, export=1 894cabdff1aSopenharmony_ci push {r4, r9-r11, lr} 895cabdff1aSopenharmony_ci lowpass_const r3 896cabdff1aSopenharmony_ci mov r11, sp 897cabdff1aSopenharmony_ciA bic sp, sp, #15 898cabdff1aSopenharmony_ciT bic r4, r11, #15 899cabdff1aSopenharmony_ciT mov sp, r4 900cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 901cabdff1aSopenharmony_ci sub r1, r1, #2 902cabdff1aSopenharmony_ci mov r3, r2 903cabdff1aSopenharmony_ci sub sp, sp, #(16*12) 904cabdff1aSopenharmony_ci mov r4, sp 905cabdff1aSopenharmony_ci vpush {d8-d15} 906cabdff1aSopenharmony_ci bl \type\()_h264_qpel16_hv_lowpass_neon 907cabdff1aSopenharmony_ci vpop {d8-d15} 908cabdff1aSopenharmony_ci mov sp, r11 909cabdff1aSopenharmony_ci pop {r4, r9-r11, pc} 910cabdff1aSopenharmony_ciendfunc 911cabdff1aSopenharmony_ci 912cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc32_neon, export=1 913cabdff1aSopenharmony_ci push {r0, r1, r4-r5, r9-r11, lr} 914cabdff1aSopenharmony_ci add r1, r1, #1 915cabdff1aSopenharmony_ci b \type\()_h264_qpel16_mc12 916cabdff1aSopenharmony_ciendfunc 917cabdff1aSopenharmony_ci 918cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc03_neon, export=1 919cabdff1aSopenharmony_ci push {r4, lr} 920cabdff1aSopenharmony_ci add r12, r1, r2 921cabdff1aSopenharmony_ci b \type\()_h264_qpel16_mc01 922cabdff1aSopenharmony_ciendfunc 923cabdff1aSopenharmony_ci 924cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc13_neon, export=1 925cabdff1aSopenharmony_ci push {r0, r1, r4, r11, lr} 926cabdff1aSopenharmony_ci add r1, r1, r2 927cabdff1aSopenharmony_ci b \type\()_h264_qpel16_mc11 928cabdff1aSopenharmony_ciendfunc 929cabdff1aSopenharmony_ci 930cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc23_neon, export=1 931cabdff1aSopenharmony_ci push {r0, r1, r4-r5, r9-r11, lr} 932cabdff1aSopenharmony_ci add r1, r1, r2 933cabdff1aSopenharmony_ci b \type\()_h264_qpel16_mc21 934cabdff1aSopenharmony_ciendfunc 935cabdff1aSopenharmony_ci 936cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc33_neon, export=1 937cabdff1aSopenharmony_ci add r1, r1, #1 938cabdff1aSopenharmony_ci push {r0, r1, r4, r11, lr} 939cabdff1aSopenharmony_ci add r1, r1, r2 940cabdff1aSopenharmony_ci sub r1, r1, #1 941cabdff1aSopenharmony_ci b \type\()_h264_qpel16_mc11 942cabdff1aSopenharmony_ciendfunc 943cabdff1aSopenharmony_ci.endm 944cabdff1aSopenharmony_ci 945cabdff1aSopenharmony_ci h264_qpel16 put 946cabdff1aSopenharmony_ci h264_qpel16 avg 947