1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * VP8 NEON optimisations 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2010 Rob Clark <rob@ti.com> 5cabdff1aSopenharmony_ci * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 25cabdff1aSopenharmony_ci#include "neon.S" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_cifunction ff_vp8_luma_dc_wht_neon, export=1 28cabdff1aSopenharmony_ci vld1.16 {q0-q1}, [r1,:128] 29cabdff1aSopenharmony_ci vmov.i16 q15, #0 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci vadd.i16 d4, d0, d3 32cabdff1aSopenharmony_ci vadd.i16 d6, d1, d2 33cabdff1aSopenharmony_ci vst1.16 {q15}, [r1,:128]! 34cabdff1aSopenharmony_ci vsub.i16 d7, d1, d2 35cabdff1aSopenharmony_ci vsub.i16 d5, d0, d3 36cabdff1aSopenharmony_ci vst1.16 {q15}, [r1,:128] 37cabdff1aSopenharmony_ci vadd.i16 q0, q2, q3 38cabdff1aSopenharmony_ci vsub.i16 q1, q2, q3 39cabdff1aSopenharmony_ci 40cabdff1aSopenharmony_ci vmov.i16 q8, #3 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_ci vtrn.32 d0, d2 43cabdff1aSopenharmony_ci vtrn.32 d1, d3 44cabdff1aSopenharmony_ci vtrn.16 d0, d1 45cabdff1aSopenharmony_ci vtrn.16 d2, d3 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci vadd.i16 d0, d0, d16 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_ci vadd.i16 d4, d0, d3 50cabdff1aSopenharmony_ci vadd.i16 d6, d1, d2 51cabdff1aSopenharmony_ci vsub.i16 d7, d1, d2 52cabdff1aSopenharmony_ci vsub.i16 d5, d0, d3 53cabdff1aSopenharmony_ci vadd.i16 q0, q2, q3 54cabdff1aSopenharmony_ci vsub.i16 q1, q2, q3 55cabdff1aSopenharmony_ci 56cabdff1aSopenharmony_ci vshr.s16 q0, q0, #3 57cabdff1aSopenharmony_ci vshr.s16 q1, q1, #3 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_ci mov r3, #32 60cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r0,:16], r3 61cabdff1aSopenharmony_ci vst1.16 {d1[0]}, [r0,:16], r3 62cabdff1aSopenharmony_ci vst1.16 {d2[0]}, [r0,:16], r3 63cabdff1aSopenharmony_ci vst1.16 {d3[0]}, [r0,:16], r3 64cabdff1aSopenharmony_ci vst1.16 {d0[1]}, [r0,:16], r3 65cabdff1aSopenharmony_ci vst1.16 {d1[1]}, [r0,:16], r3 66cabdff1aSopenharmony_ci vst1.16 {d2[1]}, [r0,:16], r3 67cabdff1aSopenharmony_ci vst1.16 {d3[1]}, [r0,:16], r3 68cabdff1aSopenharmony_ci vst1.16 {d0[2]}, [r0,:16], r3 69cabdff1aSopenharmony_ci vst1.16 {d1[2]}, [r0,:16], r3 70cabdff1aSopenharmony_ci vst1.16 {d2[2]}, [r0,:16], r3 71cabdff1aSopenharmony_ci vst1.16 {d3[2]}, [r0,:16], r3 72cabdff1aSopenharmony_ci vst1.16 {d0[3]}, [r0,:16], r3 73cabdff1aSopenharmony_ci vst1.16 {d1[3]}, [r0,:16], r3 74cabdff1aSopenharmony_ci vst1.16 {d2[3]}, [r0,:16], r3 75cabdff1aSopenharmony_ci vst1.16 {d3[3]}, [r0,:16], r3 76cabdff1aSopenharmony_ci 77cabdff1aSopenharmony_ci bx lr 78cabdff1aSopenharmony_ciendfunc 79cabdff1aSopenharmony_ci 80cabdff1aSopenharmony_cifunction ff_vp8_idct_add_neon, export=1 81cabdff1aSopenharmony_ci vld1.16 {q0-q1}, [r1,:128] 82cabdff1aSopenharmony_ci movw r3, #20091 83cabdff1aSopenharmony_ci movt r3, #35468/2 84cabdff1aSopenharmony_ci vdup.32 d4, r3 85cabdff1aSopenharmony_ci 86cabdff1aSopenharmony_ci vmull.s16 q12, d1, d4[0] 87cabdff1aSopenharmony_ci vmull.s16 q13, d3, d4[0] 88cabdff1aSopenharmony_ci vqdmulh.s16 d20, d1, d4[1] 89cabdff1aSopenharmony_ci vqdmulh.s16 d23, d3, d4[1] 90cabdff1aSopenharmony_ci vshrn.s32 d21, q12, #16 91cabdff1aSopenharmony_ci vshrn.s32 d22, q13, #16 92cabdff1aSopenharmony_ci vadd.s16 d21, d21, d1 93cabdff1aSopenharmony_ci vadd.s16 d22, d22, d3 94cabdff1aSopenharmony_ci 95cabdff1aSopenharmony_ci vadd.s16 d16, d0, d2 96cabdff1aSopenharmony_ci vsub.s16 d17, d0, d2 97cabdff1aSopenharmony_ci vadd.s16 d18, d21, d23 98cabdff1aSopenharmony_ci vsub.s16 d19, d20, d22 99cabdff1aSopenharmony_ci vadd.s16 q0, q8, q9 100cabdff1aSopenharmony_ci vsub.s16 q1, q8, q9 101cabdff1aSopenharmony_ci 102cabdff1aSopenharmony_ci vtrn.32 d0, d3 103cabdff1aSopenharmony_ci vtrn.32 d1, d2 104cabdff1aSopenharmony_ci vtrn.16 d0, d1 105cabdff1aSopenharmony_ci vtrn.16 d3, d2 106cabdff1aSopenharmony_ci 107cabdff1aSopenharmony_ci vmov.i16 q15, #0 108cabdff1aSopenharmony_ci vmull.s16 q12, d1, d4[0] 109cabdff1aSopenharmony_ci vst1.16 {q15}, [r1,:128]! 110cabdff1aSopenharmony_ci vmull.s16 q13, d2, d4[0] 111cabdff1aSopenharmony_ci vst1.16 {q15}, [r1,:128] 112cabdff1aSopenharmony_ci vqdmulh.s16 d21, d1, d4[1] 113cabdff1aSopenharmony_ci vqdmulh.s16 d23, d2, d4[1] 114cabdff1aSopenharmony_ci vshrn.s32 d20, q12, #16 115cabdff1aSopenharmony_ci vshrn.s32 d22, q13, #16 116cabdff1aSopenharmony_ci vadd.i16 d20, d20, d1 117cabdff1aSopenharmony_ci vadd.i16 d22, d22, d2 118cabdff1aSopenharmony_ci 119cabdff1aSopenharmony_ci vadd.i16 d16, d0, d3 120cabdff1aSopenharmony_ci vsub.i16 d17, d0, d3 121cabdff1aSopenharmony_ci vadd.i16 d18, d20, d23 122cabdff1aSopenharmony_ci vld1.32 {d20[]}, [r0,:32], r2 123cabdff1aSopenharmony_ci vsub.i16 d19, d21, d22 124cabdff1aSopenharmony_ci vld1.32 {d22[]}, [r0,:32], r2 125cabdff1aSopenharmony_ci vadd.s16 q0, q8, q9 126cabdff1aSopenharmony_ci vld1.32 {d23[]}, [r0,:32], r2 127cabdff1aSopenharmony_ci vsub.s16 q1, q8, q9 128cabdff1aSopenharmony_ci vld1.32 {d21[]}, [r0,:32], r2 129cabdff1aSopenharmony_ci vrshr.s16 q0, q0, #3 130cabdff1aSopenharmony_ci vtrn.32 q10, q11 131cabdff1aSopenharmony_ci vrshr.s16 q1, q1, #3 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #2 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_ci vtrn.32 d0, d3 136cabdff1aSopenharmony_ci vtrn.32 d1, d2 137cabdff1aSopenharmony_ci vtrn.16 d0, d1 138cabdff1aSopenharmony_ci vtrn.16 d3, d2 139cabdff1aSopenharmony_ci 140cabdff1aSopenharmony_ci vaddw.u8 q0, q0, d20 141cabdff1aSopenharmony_ci vaddw.u8 q1, q1, d21 142cabdff1aSopenharmony_ci vqmovun.s16 d0, q0 143cabdff1aSopenharmony_ci vqmovun.s16 d1, q1 144cabdff1aSopenharmony_ci 145cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0,:32], r2 146cabdff1aSopenharmony_ci vst1.32 {d0[1]}, [r0,:32], r2 147cabdff1aSopenharmony_ci vst1.32 {d1[1]}, [r0,:32], r2 148cabdff1aSopenharmony_ci vst1.32 {d1[0]}, [r0,:32], r2 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci bx lr 151cabdff1aSopenharmony_ciendfunc 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add_neon, export=1 154cabdff1aSopenharmony_ci mov r3, #0 155cabdff1aSopenharmony_ci ldrsh r12, [r1] 156cabdff1aSopenharmony_ci strh r3, [r1] 157cabdff1aSopenharmony_ci vdup.16 q1, r12 158cabdff1aSopenharmony_ci vrshr.s16 q1, q1, #3 159cabdff1aSopenharmony_ci vld1.32 {d0[]}, [r0,:32], r2 160cabdff1aSopenharmony_ci vld1.32 {d1[]}, [r0,:32], r2 161cabdff1aSopenharmony_ci vld1.32 {d0[1]}, [r0,:32], r2 162cabdff1aSopenharmony_ci vld1.32 {d1[1]}, [r0,:32], r2 163cabdff1aSopenharmony_ci vaddw.u8 q2, q1, d0 164cabdff1aSopenharmony_ci vaddw.u8 q3, q1, d1 165cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #2 166cabdff1aSopenharmony_ci vqmovun.s16 d0, q2 167cabdff1aSopenharmony_ci vqmovun.s16 d1, q3 168cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0,:32], r2 169cabdff1aSopenharmony_ci vst1.32 {d1[0]}, [r0,:32], r2 170cabdff1aSopenharmony_ci vst1.32 {d0[1]}, [r0,:32], r2 171cabdff1aSopenharmony_ci vst1.32 {d1[1]}, [r0,:32], r2 172cabdff1aSopenharmony_ci bx lr 173cabdff1aSopenharmony_ciendfunc 174cabdff1aSopenharmony_ci 175cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add4uv_neon, export=1 176cabdff1aSopenharmony_ci vmov.i16 d0, #0 177cabdff1aSopenharmony_ci mov r3, #32 178cabdff1aSopenharmony_ci vld1.16 {d16[]}, [r1,:16] 179cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 180cabdff1aSopenharmony_ci vld1.16 {d17[]}, [r1,:16] 181cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 182cabdff1aSopenharmony_ci vld1.16 {d18[]}, [r1,:16] 183cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 184cabdff1aSopenharmony_ci vld1.16 {d19[]}, [r1,:16] 185cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 186cabdff1aSopenharmony_ci mov r3, r0 187cabdff1aSopenharmony_ci vrshr.s16 q8, q8, #3 @ dc >>= 3 188cabdff1aSopenharmony_ci vld1.8 {d0}, [r0,:64], r2 189cabdff1aSopenharmony_ci vrshr.s16 q9, q9, #3 190cabdff1aSopenharmony_ci vld1.8 {d1}, [r0,:64], r2 191cabdff1aSopenharmony_ci vaddw.u8 q10, q8, d0 192cabdff1aSopenharmony_ci vld1.8 {d2}, [r0,:64], r2 193cabdff1aSopenharmony_ci vaddw.u8 q0, q8, d1 194cabdff1aSopenharmony_ci vld1.8 {d3}, [r0,:64], r2 195cabdff1aSopenharmony_ci vaddw.u8 q11, q8, d2 196cabdff1aSopenharmony_ci vld1.8 {d4}, [r0,:64], r2 197cabdff1aSopenharmony_ci vaddw.u8 q1, q8, d3 198cabdff1aSopenharmony_ci vld1.8 {d5}, [r0,:64], r2 199cabdff1aSopenharmony_ci vaddw.u8 q12, q9, d4 200cabdff1aSopenharmony_ci vld1.8 {d6}, [r0,:64], r2 201cabdff1aSopenharmony_ci vaddw.u8 q2, q9, d5 202cabdff1aSopenharmony_ci vld1.8 {d7}, [r0,:64], r2 203cabdff1aSopenharmony_ci vaddw.u8 q13, q9, d6 204cabdff1aSopenharmony_ci vqmovun.s16 d20, q10 205cabdff1aSopenharmony_ci vaddw.u8 q3, q9, d7 206cabdff1aSopenharmony_ci vqmovun.s16 d21, q0 207cabdff1aSopenharmony_ci vqmovun.s16 d22, q11 208cabdff1aSopenharmony_ci vst1.8 {d20}, [r3,:64], r2 209cabdff1aSopenharmony_ci vqmovun.s16 d23, q1 210cabdff1aSopenharmony_ci vst1.8 {d21}, [r3,:64], r2 211cabdff1aSopenharmony_ci vqmovun.s16 d24, q12 212cabdff1aSopenharmony_ci vst1.8 {d22}, [r3,:64], r2 213cabdff1aSopenharmony_ci vqmovun.s16 d25, q2 214cabdff1aSopenharmony_ci vst1.8 {d23}, [r3,:64], r2 215cabdff1aSopenharmony_ci vqmovun.s16 d26, q13 216cabdff1aSopenharmony_ci vst1.8 {d24}, [r3,:64], r2 217cabdff1aSopenharmony_ci vqmovun.s16 d27, q3 218cabdff1aSopenharmony_ci vst1.8 {d25}, [r3,:64], r2 219cabdff1aSopenharmony_ci vst1.8 {d26}, [r3,:64], r2 220cabdff1aSopenharmony_ci vst1.8 {d27}, [r3,:64], r2 221cabdff1aSopenharmony_ci 222cabdff1aSopenharmony_ci bx lr 223cabdff1aSopenharmony_ciendfunc 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add4y_neon, export=1 226cabdff1aSopenharmony_ci vmov.i16 d0, #0 227cabdff1aSopenharmony_ci mov r3, #32 228cabdff1aSopenharmony_ci vld1.16 {d16[]}, [r1,:16] 229cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 230cabdff1aSopenharmony_ci vld1.16 {d17[]}, [r1,:16] 231cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 232cabdff1aSopenharmony_ci vld1.16 {d18[]}, [r1,:16] 233cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 234cabdff1aSopenharmony_ci vld1.16 {d19[]}, [r1,:16] 235cabdff1aSopenharmony_ci vst1.16 {d0[0]}, [r1,:16], r3 236cabdff1aSopenharmony_ci vrshr.s16 q8, q8, #3 @ dc >>= 3 237cabdff1aSopenharmony_ci vld1.8 {q0}, [r0,:128], r2 238cabdff1aSopenharmony_ci vrshr.s16 q9, q9, #3 239cabdff1aSopenharmony_ci vld1.8 {q1}, [r0,:128], r2 240cabdff1aSopenharmony_ci vaddw.u8 q10, q8, d0 241cabdff1aSopenharmony_ci vld1.8 {q2}, [r0,:128], r2 242cabdff1aSopenharmony_ci vaddw.u8 q0, q9, d1 243cabdff1aSopenharmony_ci vld1.8 {q3}, [r0,:128], r2 244cabdff1aSopenharmony_ci vaddw.u8 q11, q8, d2 245cabdff1aSopenharmony_ci vaddw.u8 q1, q9, d3 246cabdff1aSopenharmony_ci vaddw.u8 q12, q8, d4 247cabdff1aSopenharmony_ci vaddw.u8 q2, q9, d5 248cabdff1aSopenharmony_ci vaddw.u8 q13, q8, d6 249cabdff1aSopenharmony_ci vaddw.u8 q3, q9, d7 250cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #2 251cabdff1aSopenharmony_ci vqmovun.s16 d20, q10 252cabdff1aSopenharmony_ci vqmovun.s16 d21, q0 253cabdff1aSopenharmony_ci vqmovun.s16 d22, q11 254cabdff1aSopenharmony_ci vqmovun.s16 d23, q1 255cabdff1aSopenharmony_ci vqmovun.s16 d24, q12 256cabdff1aSopenharmony_ci vst1.8 {q10}, [r0,:128], r2 257cabdff1aSopenharmony_ci vqmovun.s16 d25, q2 258cabdff1aSopenharmony_ci vst1.8 {q11}, [r0,:128], r2 259cabdff1aSopenharmony_ci vqmovun.s16 d26, q13 260cabdff1aSopenharmony_ci vst1.8 {q12}, [r0,:128], r2 261cabdff1aSopenharmony_ci vqmovun.s16 d27, q3 262cabdff1aSopenharmony_ci vst1.8 {q13}, [r0,:128], r2 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_ci bx lr 265cabdff1aSopenharmony_ciendfunc 266cabdff1aSopenharmony_ci 267cabdff1aSopenharmony_ci@ Register layout: 268cabdff1aSopenharmony_ci@ P3..Q3 -> q0..q7 269cabdff1aSopenharmony_ci@ flim_E -> q14 270cabdff1aSopenharmony_ci@ flim_I -> q15 271cabdff1aSopenharmony_ci@ hev_thresh -> r12 272cabdff1aSopenharmony_ci@ 273cabdff1aSopenharmony_ci.macro vp8_loop_filter, inner=0, simple=0 274cabdff1aSopenharmony_ci .if \simple 275cabdff1aSopenharmony_ci vabd.u8 q9, q3, q4 @ abs(P0-Q0) 276cabdff1aSopenharmony_ci vabd.u8 q15, q2, q5 @ abs(P1-Q1) 277cabdff1aSopenharmony_ci vqadd.u8 q9, q9, q9 @ abs(P0-Q0) * 2 278cabdff1aSopenharmony_ci vshr.u8 q10, q15, #1 @ abs(P1-Q1) / 2 279cabdff1aSopenharmony_ci vqadd.u8 q11, q9, q10 @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2) 280cabdff1aSopenharmony_ci vmov.i8 q13, #0x80 281cabdff1aSopenharmony_ci vcle.u8 q8, q11, q14 @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim 282cabdff1aSopenharmony_ci .else 283cabdff1aSopenharmony_ci @ calculate hev and normal_limit: 284cabdff1aSopenharmony_ci vabd.u8 q12, q2, q3 @ abs(P1-P0) 285cabdff1aSopenharmony_ci vabd.u8 q13, q5, q4 @ abs(Q1-Q0) 286cabdff1aSopenharmony_ci vabd.u8 q10, q0, q1 @ abs(P3-P2) 287cabdff1aSopenharmony_ci vabd.u8 q11, q1, q2 @ abs(P2-P1) 288cabdff1aSopenharmony_ci vcle.u8 q8, q12, q15 @ abs(P1-P0) <= flim_I 289cabdff1aSopenharmony_ci vcle.u8 q9, q13, q15 @ abs(Q1-Q0) <= flim_I 290cabdff1aSopenharmony_ci vcle.u8 q10, q10, q15 @ abs(P3-P2) <= flim_I 291cabdff1aSopenharmony_ci vcle.u8 q11, q11, q15 @ abs(P2-P1) <= flim_I 292cabdff1aSopenharmony_ci vand q8, q8, q9 293cabdff1aSopenharmony_ci vabd.u8 q9, q7, q6 @ abs(Q3-Q2) 294cabdff1aSopenharmony_ci vand q8, q8, q11 295cabdff1aSopenharmony_ci vabd.u8 q11, q6, q5 @ abs(Q2-Q1) 296cabdff1aSopenharmony_ci vand q8, q8, q10 297cabdff1aSopenharmony_ci vcle.u8 q10, q9, q15 @ abs(Q3-Q2) <= flim_I 298cabdff1aSopenharmony_ci vcle.u8 q11, q11, q15 @ abs(Q2-Q1) <= flim_I 299cabdff1aSopenharmony_ci vabd.u8 q9, q3, q4 @ abs(P0-Q0) 300cabdff1aSopenharmony_ci vabd.u8 q15, q2, q5 @ abs(P1-Q1) 301cabdff1aSopenharmony_ci vand q8, q8, q10 302cabdff1aSopenharmony_ci vqadd.u8 q9, q9, q9 @ abs(P0-Q0) * 2 303cabdff1aSopenharmony_ci vand q8, q8, q11 304cabdff1aSopenharmony_ci vshr.u8 q10, q15, #1 @ abs(P1-Q1) / 2 305cabdff1aSopenharmony_ci vdup.8 q15, r12 @ hev_thresh 306cabdff1aSopenharmony_ci vqadd.u8 q11, q9, q10 @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2) 307cabdff1aSopenharmony_ci vcgt.u8 q12, q12, q15 @ abs(P1-P0) > hev_thresh 308cabdff1aSopenharmony_ci vcle.u8 q11, q11, q14 @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim_E 309cabdff1aSopenharmony_ci vcgt.u8 q14, q13, q15 @ abs(Q1-Q0) > hev_thresh 310cabdff1aSopenharmony_ci vand q8, q8, q11 311cabdff1aSopenharmony_ci vmov.i8 q13, #0x80 312cabdff1aSopenharmony_ci vorr q9, q12, q14 313cabdff1aSopenharmony_ci .endif 314cabdff1aSopenharmony_ci 315cabdff1aSopenharmony_ci @ at this point: 316cabdff1aSopenharmony_ci @ q8: normal_limit 317cabdff1aSopenharmony_ci @ q9: hev 318cabdff1aSopenharmony_ci 319cabdff1aSopenharmony_ci @ convert to signed value: 320cabdff1aSopenharmony_ci veor q3, q3, q13 @ PS0 = P0 ^ 0x80 321cabdff1aSopenharmony_ci veor q4, q4, q13 @ QS0 = Q0 ^ 0x80 322cabdff1aSopenharmony_ci 323cabdff1aSopenharmony_ci vmov.i16 q12, #3 324cabdff1aSopenharmony_ci vsubl.s8 q10, d8, d6 @ QS0 - PS0 325cabdff1aSopenharmony_ci vsubl.s8 q11, d9, d7 @ (widened to 16 bits) 326cabdff1aSopenharmony_ci veor q2, q2, q13 @ PS1 = P1 ^ 0x80 327cabdff1aSopenharmony_ci veor q5, q5, q13 @ QS1 = Q1 ^ 0x80 328cabdff1aSopenharmony_ci vmul.i16 q10, q10, q12 @ w = 3 * (QS0 - PS0) 329cabdff1aSopenharmony_ci vmul.i16 q11, q11, q12 330cabdff1aSopenharmony_ci 331cabdff1aSopenharmony_ci vqsub.s8 q12, q2, q5 @ clamp(PS1-QS1) 332cabdff1aSopenharmony_ci vmov.i8 q14, #4 333cabdff1aSopenharmony_ci vmov.i8 q15, #3 334cabdff1aSopenharmony_ci .if \inner 335cabdff1aSopenharmony_ci vand q12, q12, q9 @ if(hev) w += clamp(PS1-QS1) 336cabdff1aSopenharmony_ci .endif 337cabdff1aSopenharmony_ci vaddw.s8 q10, q10, d24 @ w += clamp(PS1-QS1) 338cabdff1aSopenharmony_ci vaddw.s8 q11, q11, d25 339cabdff1aSopenharmony_ci vqmovn.s16 d20, q10 @ narrow result back into q10 340cabdff1aSopenharmony_ci vqmovn.s16 d21, q11 341cabdff1aSopenharmony_ci .if !\inner && !\simple 342cabdff1aSopenharmony_ci veor q1, q1, q13 @ PS2 = P2 ^ 0x80 343cabdff1aSopenharmony_ci veor q6, q6, q13 @ QS2 = Q2 ^ 0x80 344cabdff1aSopenharmony_ci .endif 345cabdff1aSopenharmony_ci vand q10, q10, q8 @ w &= normal_limit 346cabdff1aSopenharmony_ci 347cabdff1aSopenharmony_ci @ registers used at this point.. 348cabdff1aSopenharmony_ci @ q0 -> P3 (don't corrupt) 349cabdff1aSopenharmony_ci @ q1-q6 -> PS2-QS2 350cabdff1aSopenharmony_ci @ q7 -> Q3 (don't corrupt) 351cabdff1aSopenharmony_ci @ q9 -> hev 352cabdff1aSopenharmony_ci @ q10 -> w 353cabdff1aSopenharmony_ci @ q13 -> #0x80 354cabdff1aSopenharmony_ci @ q14 -> #4 355cabdff1aSopenharmony_ci @ q15 -> #3 356cabdff1aSopenharmony_ci @ q8, q11, q12 -> unused 357cabdff1aSopenharmony_ci 358cabdff1aSopenharmony_ci @ filter_common: is4tap==1 359cabdff1aSopenharmony_ci @ c1 = clamp(w + 4) >> 3; 360cabdff1aSopenharmony_ci @ c2 = clamp(w + 3) >> 3; 361cabdff1aSopenharmony_ci @ Q0 = s2u(QS0 - c1); 362cabdff1aSopenharmony_ci @ P0 = s2u(PS0 + c2); 363cabdff1aSopenharmony_ci 364cabdff1aSopenharmony_ci .if \simple 365cabdff1aSopenharmony_ci vqadd.s8 q11, q10, q14 @ c1 = clamp((w&hev)+4) 366cabdff1aSopenharmony_ci vqadd.s8 q12, q10, q15 @ c2 = clamp((w&hev)+3) 367cabdff1aSopenharmony_ci vshr.s8 q11, q11, #3 @ c1 >>= 3 368cabdff1aSopenharmony_ci vshr.s8 q12, q12, #3 @ c2 >>= 3 369cabdff1aSopenharmony_ci vqsub.s8 q4, q4, q11 @ QS0 = clamp(QS0-c1) 370cabdff1aSopenharmony_ci vqadd.s8 q3, q3, q12 @ PS0 = clamp(PS0+c2) 371cabdff1aSopenharmony_ci veor q4, q4, q13 @ Q0 = QS0 ^ 0x80 372cabdff1aSopenharmony_ci veor q3, q3, q13 @ P0 = PS0 ^ 0x80 373cabdff1aSopenharmony_ci veor q5, q5, q13 @ Q1 = QS1 ^ 0x80 374cabdff1aSopenharmony_ci veor q2, q2, q13 @ P1 = PS1 ^ 0x80 375cabdff1aSopenharmony_ci .elseif \inner 376cabdff1aSopenharmony_ci @ the !is4tap case of filter_common, only used for inner blocks 377cabdff1aSopenharmony_ci @ c3 = ((c1&~hev) + 1) >> 1; 378cabdff1aSopenharmony_ci @ Q1 = s2u(QS1 - c3); 379cabdff1aSopenharmony_ci @ P1 = s2u(PS1 + c3); 380cabdff1aSopenharmony_ci vqadd.s8 q11, q10, q14 @ c1 = clamp((w&hev)+4) 381cabdff1aSopenharmony_ci vqadd.s8 q12, q10, q15 @ c2 = clamp((w&hev)+3) 382cabdff1aSopenharmony_ci vshr.s8 q11, q11, #3 @ c1 >>= 3 383cabdff1aSopenharmony_ci vshr.s8 q12, q12, #3 @ c2 >>= 3 384cabdff1aSopenharmony_ci vqsub.s8 q4, q4, q11 @ QS0 = clamp(QS0-c1) 385cabdff1aSopenharmony_ci vqadd.s8 q3, q3, q12 @ PS0 = clamp(PS0+c2) 386cabdff1aSopenharmony_ci vbic q11, q11, q9 @ c1 & ~hev 387cabdff1aSopenharmony_ci veor q4, q4, q13 @ Q0 = QS0 ^ 0x80 388cabdff1aSopenharmony_ci vrshr.s8 q11, q11, #1 @ c3 >>= 1 389cabdff1aSopenharmony_ci veor q3, q3, q13 @ P0 = PS0 ^ 0x80 390cabdff1aSopenharmony_ci vqsub.s8 q5, q5, q11 @ QS1 = clamp(QS1-c3) 391cabdff1aSopenharmony_ci vqadd.s8 q2, q2, q11 @ PS1 = clamp(PS1+c3) 392cabdff1aSopenharmony_ci veor q5, q5, q13 @ Q1 = QS1 ^ 0x80 393cabdff1aSopenharmony_ci veor q2, q2, q13 @ P1 = PS1 ^ 0x80 394cabdff1aSopenharmony_ci .else 395cabdff1aSopenharmony_ci vand q12, q10, q9 @ w & hev 396cabdff1aSopenharmony_ci vqadd.s8 q11, q12, q14 @ c1 = clamp((w&hev)+4) 397cabdff1aSopenharmony_ci vqadd.s8 q12, q12, q15 @ c2 = clamp((w&hev)+3) 398cabdff1aSopenharmony_ci vshr.s8 q11, q11, #3 @ c1 >>= 3 399cabdff1aSopenharmony_ci vshr.s8 q12, q12, #3 @ c2 >>= 3 400cabdff1aSopenharmony_ci vbic q10, q10, q9 @ w &= ~hev 401cabdff1aSopenharmony_ci vqsub.s8 q4, q4, q11 @ QS0 = clamp(QS0-c1) 402cabdff1aSopenharmony_ci vqadd.s8 q3, q3, q12 @ PS0 = clamp(PS0+c2) 403cabdff1aSopenharmony_ci 404cabdff1aSopenharmony_ci @ filter_mbedge: 405cabdff1aSopenharmony_ci @ a = clamp((27*w + 63) >> 7); 406cabdff1aSopenharmony_ci @ Q0 = s2u(QS0 - a); 407cabdff1aSopenharmony_ci @ P0 = s2u(PS0 + a); 408cabdff1aSopenharmony_ci @ a = clamp((18*w + 63) >> 7); 409cabdff1aSopenharmony_ci @ Q1 = s2u(QS1 - a); 410cabdff1aSopenharmony_ci @ P1 = s2u(PS1 + a); 411cabdff1aSopenharmony_ci @ a = clamp((9*w + 63) >> 7); 412cabdff1aSopenharmony_ci @ Q2 = s2u(QS2 - a); 413cabdff1aSopenharmony_ci @ P2 = s2u(PS2 + a); 414cabdff1aSopenharmony_ci vmov.i16 q9, #63 415cabdff1aSopenharmony_ci vshll.s8 q14, d20, #3 416cabdff1aSopenharmony_ci vshll.s8 q15, d21, #3 417cabdff1aSopenharmony_ci vaddw.s8 q14, q14, d20 418cabdff1aSopenharmony_ci vaddw.s8 q15, q15, d21 419cabdff1aSopenharmony_ci vadd.s16 q8, q9, q14 420cabdff1aSopenharmony_ci vadd.s16 q9, q9, q15 @ 9*w + 63 421cabdff1aSopenharmony_ci vadd.s16 q11, q8, q14 422cabdff1aSopenharmony_ci vadd.s16 q12, q9, q15 @ 18*w + 63 423cabdff1aSopenharmony_ci vadd.s16 q14, q11, q14 424cabdff1aSopenharmony_ci vadd.s16 q15, q12, q15 @ 27*w + 63 425cabdff1aSopenharmony_ci vqshrn.s16 d16, q8, #7 426cabdff1aSopenharmony_ci vqshrn.s16 d17, q9, #7 @ clamp(( 9*w + 63)>>7) 427cabdff1aSopenharmony_ci vqshrn.s16 d22, q11, #7 428cabdff1aSopenharmony_ci vqshrn.s16 d23, q12, #7 @ clamp((18*w + 63)>>7) 429cabdff1aSopenharmony_ci vqshrn.s16 d28, q14, #7 430cabdff1aSopenharmony_ci vqshrn.s16 d29, q15, #7 @ clamp((27*w + 63)>>7) 431cabdff1aSopenharmony_ci vqadd.s8 q1, q1, q8 @ PS2 = clamp(PS2+a) 432cabdff1aSopenharmony_ci vqsub.s8 q6, q6, q8 @ QS2 = clamp(QS2-a) 433cabdff1aSopenharmony_ci vqadd.s8 q2, q2, q11 @ PS1 = clamp(PS1+a) 434cabdff1aSopenharmony_ci vqsub.s8 q5, q5, q11 @ QS1 = clamp(QS1-a) 435cabdff1aSopenharmony_ci vqadd.s8 q3, q3, q14 @ PS0 = clamp(PS0+a) 436cabdff1aSopenharmony_ci vqsub.s8 q4, q4, q14 @ QS0 = clamp(QS0-a) 437cabdff1aSopenharmony_ci veor q3, q3, q13 @ P0 = PS0 ^ 0x80 438cabdff1aSopenharmony_ci veor q4, q4, q13 @ Q0 = QS0 ^ 0x80 439cabdff1aSopenharmony_ci veor q2, q2, q13 @ P1 = PS1 ^ 0x80 440cabdff1aSopenharmony_ci veor q5, q5, q13 @ Q1 = QS1 ^ 0x80 441cabdff1aSopenharmony_ci veor q1, q1, q13 @ P2 = PS2 ^ 0x80 442cabdff1aSopenharmony_ci veor q6, q6, q13 @ Q2 = QS2 ^ 0x80 443cabdff1aSopenharmony_ci .endif 444cabdff1aSopenharmony_ci.endm 445cabdff1aSopenharmony_ci 446cabdff1aSopenharmony_ci.macro vp8_v_loop_filter16 name, inner=0, simple=0 447cabdff1aSopenharmony_cifunction ff_vp8_v_loop_filter16\name\()_neon, export=1 448cabdff1aSopenharmony_ci vpush {q4-q7} 449cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1+!\simple 450cabdff1aSopenharmony_ci 451cabdff1aSopenharmony_ci @ Load pixels: 452cabdff1aSopenharmony_ci .if !\simple 453cabdff1aSopenharmony_ci ldr r12, [sp, #64] @ hev_thresh 454cabdff1aSopenharmony_ci vld1.8 {q0}, [r0,:128], r1 @ P3 455cabdff1aSopenharmony_ci vld1.8 {q1}, [r0,:128], r1 @ P2 456cabdff1aSopenharmony_ci .endif 457cabdff1aSopenharmony_ci vld1.8 {q2}, [r0,:128], r1 @ P1 458cabdff1aSopenharmony_ci vld1.8 {q3}, [r0,:128], r1 @ P0 459cabdff1aSopenharmony_ci vld1.8 {q4}, [r0,:128], r1 @ Q0 460cabdff1aSopenharmony_ci vld1.8 {q5}, [r0,:128], r1 @ Q1 461cabdff1aSopenharmony_ci .if !\simple 462cabdff1aSopenharmony_ci vld1.8 {q6}, [r0,:128], r1 @ Q2 463cabdff1aSopenharmony_ci vld1.8 {q7}, [r0,:128] @ Q3 464cabdff1aSopenharmony_ci vdup.8 q15, r3 @ flim_I 465cabdff1aSopenharmony_ci .endif 466cabdff1aSopenharmony_ci vdup.8 q14, r2 @ flim_E 467cabdff1aSopenharmony_ci 468cabdff1aSopenharmony_ci vp8_loop_filter inner=\inner, simple=\simple 469cabdff1aSopenharmony_ci 470cabdff1aSopenharmony_ci @ back up to P2: dst -= stride * 6 471cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #2 472cabdff1aSopenharmony_ci .if !\simple 473cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1 474cabdff1aSopenharmony_ci 475cabdff1aSopenharmony_ci @ Store pixels: 476cabdff1aSopenharmony_ci vst1.8 {q1}, [r0,:128], r1 @ P2 477cabdff1aSopenharmony_ci .endif 478cabdff1aSopenharmony_ci vst1.8 {q2}, [r0,:128], r1 @ P1 479cabdff1aSopenharmony_ci vst1.8 {q3}, [r0,:128], r1 @ P0 480cabdff1aSopenharmony_ci vst1.8 {q4}, [r0,:128], r1 @ Q0 481cabdff1aSopenharmony_ci vst1.8 {q5}, [r0,:128], r1 @ Q1 482cabdff1aSopenharmony_ci .if !\simple 483cabdff1aSopenharmony_ci vst1.8 {q6}, [r0,:128] @ Q2 484cabdff1aSopenharmony_ci .endif 485cabdff1aSopenharmony_ci 486cabdff1aSopenharmony_ci vpop {q4-q7} 487cabdff1aSopenharmony_ci bx lr 488cabdff1aSopenharmony_ciendfunc 489cabdff1aSopenharmony_ci.endm 490cabdff1aSopenharmony_ci 491cabdff1aSopenharmony_civp8_v_loop_filter16 492cabdff1aSopenharmony_civp8_v_loop_filter16 _inner, inner=1 493cabdff1aSopenharmony_civp8_v_loop_filter16 _simple, simple=1 494cabdff1aSopenharmony_ci 495cabdff1aSopenharmony_ci.macro vp8_v_loop_filter8uv name, inner=0 496cabdff1aSopenharmony_cifunction ff_vp8_v_loop_filter8uv\name\()_neon, export=1 497cabdff1aSopenharmony_ci vpush {q4-q7} 498cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #2 499cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #2 500cabdff1aSopenharmony_ci ldr r12, [sp, #64] @ flim_I 501cabdff1aSopenharmony_ci 502cabdff1aSopenharmony_ci @ Load pixels: 503cabdff1aSopenharmony_ci vld1.8 {d0}, [r0,:64], r2 @ P3 504cabdff1aSopenharmony_ci vld1.8 {d1}, [r1,:64], r2 @ P3 505cabdff1aSopenharmony_ci vld1.8 {d2}, [r0,:64], r2 @ P2 506cabdff1aSopenharmony_ci vld1.8 {d3}, [r1,:64], r2 @ P2 507cabdff1aSopenharmony_ci vld1.8 {d4}, [r0,:64], r2 @ P1 508cabdff1aSopenharmony_ci vld1.8 {d5}, [r1,:64], r2 @ P1 509cabdff1aSopenharmony_ci vld1.8 {d6}, [r0,:64], r2 @ P0 510cabdff1aSopenharmony_ci vld1.8 {d7}, [r1,:64], r2 @ P0 511cabdff1aSopenharmony_ci vld1.8 {d8}, [r0,:64], r2 @ Q0 512cabdff1aSopenharmony_ci vld1.8 {d9}, [r1,:64], r2 @ Q0 513cabdff1aSopenharmony_ci vld1.8 {d10}, [r0,:64], r2 @ Q1 514cabdff1aSopenharmony_ci vld1.8 {d11}, [r1,:64], r2 @ Q1 515cabdff1aSopenharmony_ci vld1.8 {d12}, [r0,:64], r2 @ Q2 516cabdff1aSopenharmony_ci vld1.8 {d13}, [r1,:64], r2 @ Q2 517cabdff1aSopenharmony_ci vld1.8 {d14}, [r0,:64] @ Q3 518cabdff1aSopenharmony_ci vld1.8 {d15}, [r1,:64] @ Q3 519cabdff1aSopenharmony_ci 520cabdff1aSopenharmony_ci vdup.8 q14, r3 @ flim_E 521cabdff1aSopenharmony_ci vdup.8 q15, r12 @ flim_I 522cabdff1aSopenharmony_ci ldr r12, [sp, #68] @ hev_thresh 523cabdff1aSopenharmony_ci 524cabdff1aSopenharmony_ci vp8_loop_filter inner=\inner 525cabdff1aSopenharmony_ci 526cabdff1aSopenharmony_ci @ back up to P2: u,v -= stride * 6 527cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #2 528cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #2 529cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #1 530cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #1 531cabdff1aSopenharmony_ci 532cabdff1aSopenharmony_ci @ Store pixels: 533cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r2 @ P2 534cabdff1aSopenharmony_ci vst1.8 {d3}, [r1,:64], r2 @ P2 535cabdff1aSopenharmony_ci vst1.8 {d4}, [r0,:64], r2 @ P1 536cabdff1aSopenharmony_ci vst1.8 {d5}, [r1,:64], r2 @ P1 537cabdff1aSopenharmony_ci vst1.8 {d6}, [r0,:64], r2 @ P0 538cabdff1aSopenharmony_ci vst1.8 {d7}, [r1,:64], r2 @ P0 539cabdff1aSopenharmony_ci vst1.8 {d8}, [r0,:64], r2 @ Q0 540cabdff1aSopenharmony_ci vst1.8 {d9}, [r1,:64], r2 @ Q0 541cabdff1aSopenharmony_ci vst1.8 {d10}, [r0,:64], r2 @ Q1 542cabdff1aSopenharmony_ci vst1.8 {d11}, [r1,:64], r2 @ Q1 543cabdff1aSopenharmony_ci vst1.8 {d12}, [r0,:64] @ Q2 544cabdff1aSopenharmony_ci vst1.8 {d13}, [r1,:64] @ Q2 545cabdff1aSopenharmony_ci 546cabdff1aSopenharmony_ci vpop {q4-q7} 547cabdff1aSopenharmony_ci bx lr 548cabdff1aSopenharmony_ciendfunc 549cabdff1aSopenharmony_ci.endm 550cabdff1aSopenharmony_ci 551cabdff1aSopenharmony_civp8_v_loop_filter8uv 552cabdff1aSopenharmony_civp8_v_loop_filter8uv _inner, inner=1 553cabdff1aSopenharmony_ci 554cabdff1aSopenharmony_ci.macro vp8_h_loop_filter16 name, inner=0, simple=0 555cabdff1aSopenharmony_cifunction ff_vp8_h_loop_filter16\name\()_neon, export=1 556cabdff1aSopenharmony_ci vpush {q4-q7} 557cabdff1aSopenharmony_ci sub r0, r0, #4 558cabdff1aSopenharmony_ci .if !\simple 559cabdff1aSopenharmony_ci ldr r12, [sp, #64] @ hev_thresh 560cabdff1aSopenharmony_ci .endif 561cabdff1aSopenharmony_ci 562cabdff1aSopenharmony_ci @ Load pixels: 563cabdff1aSopenharmony_ci vld1.8 {d0}, [r0], r1 @ load first 8-line src data 564cabdff1aSopenharmony_ci vld1.8 {d2}, [r0], r1 565cabdff1aSopenharmony_ci vld1.8 {d4}, [r0], r1 566cabdff1aSopenharmony_ci vld1.8 {d6}, [r0], r1 567cabdff1aSopenharmony_ci vld1.8 {d8}, [r0], r1 568cabdff1aSopenharmony_ci vld1.8 {d10}, [r0], r1 569cabdff1aSopenharmony_ci vld1.8 {d12}, [r0], r1 570cabdff1aSopenharmony_ci vld1.8 {d14}, [r0], r1 571cabdff1aSopenharmony_ci vld1.8 {d1}, [r0], r1 @ load second 8-line src data 572cabdff1aSopenharmony_ci vld1.8 {d3}, [r0], r1 573cabdff1aSopenharmony_ci vld1.8 {d5}, [r0], r1 574cabdff1aSopenharmony_ci vld1.8 {d7}, [r0], r1 575cabdff1aSopenharmony_ci vld1.8 {d9}, [r0], r1 576cabdff1aSopenharmony_ci vld1.8 {d11}, [r0], r1 577cabdff1aSopenharmony_ci vld1.8 {d13}, [r0], r1 578cabdff1aSopenharmony_ci vld1.8 {d15}, [r0], r1 579cabdff1aSopenharmony_ci 580cabdff1aSopenharmony_ci transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 581cabdff1aSopenharmony_ci 582cabdff1aSopenharmony_ci vdup.8 q14, r2 @ flim_E 583cabdff1aSopenharmony_ci .if !\simple 584cabdff1aSopenharmony_ci vdup.8 q15, r3 @ flim_I 585cabdff1aSopenharmony_ci .endif 586cabdff1aSopenharmony_ci 587cabdff1aSopenharmony_ci vp8_loop_filter inner=\inner, simple=\simple 588cabdff1aSopenharmony_ci 589cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #4 @ backup 16 rows 590cabdff1aSopenharmony_ci 591cabdff1aSopenharmony_ci transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 592cabdff1aSopenharmony_ci 593cabdff1aSopenharmony_ci @ Store pixels: 594cabdff1aSopenharmony_ci vst1.8 {d0}, [r0], r1 595cabdff1aSopenharmony_ci vst1.8 {d2}, [r0], r1 596cabdff1aSopenharmony_ci vst1.8 {d4}, [r0], r1 597cabdff1aSopenharmony_ci vst1.8 {d6}, [r0], r1 598cabdff1aSopenharmony_ci vst1.8 {d8}, [r0], r1 599cabdff1aSopenharmony_ci vst1.8 {d10}, [r0], r1 600cabdff1aSopenharmony_ci vst1.8 {d12}, [r0], r1 601cabdff1aSopenharmony_ci vst1.8 {d14}, [r0], r1 602cabdff1aSopenharmony_ci vst1.8 {d1}, [r0], r1 603cabdff1aSopenharmony_ci vst1.8 {d3}, [r0], r1 604cabdff1aSopenharmony_ci vst1.8 {d5}, [r0], r1 605cabdff1aSopenharmony_ci vst1.8 {d7}, [r0], r1 606cabdff1aSopenharmony_ci vst1.8 {d9}, [r0], r1 607cabdff1aSopenharmony_ci vst1.8 {d11}, [r0], r1 608cabdff1aSopenharmony_ci vst1.8 {d13}, [r0], r1 609cabdff1aSopenharmony_ci vst1.8 {d15}, [r0] 610cabdff1aSopenharmony_ci 611cabdff1aSopenharmony_ci vpop {q4-q7} 612cabdff1aSopenharmony_ci bx lr 613cabdff1aSopenharmony_ciendfunc 614cabdff1aSopenharmony_ci.endm 615cabdff1aSopenharmony_ci 616cabdff1aSopenharmony_civp8_h_loop_filter16 617cabdff1aSopenharmony_civp8_h_loop_filter16 _inner, inner=1 618cabdff1aSopenharmony_civp8_h_loop_filter16 _simple, simple=1 619cabdff1aSopenharmony_ci 620cabdff1aSopenharmony_ci.macro vp8_h_loop_filter8uv name, inner=0 621cabdff1aSopenharmony_cifunction ff_vp8_h_loop_filter8uv\name\()_neon, export=1 622cabdff1aSopenharmony_ci vpush {q4-q7} 623cabdff1aSopenharmony_ci sub r0, r0, #4 624cabdff1aSopenharmony_ci sub r1, r1, #4 625cabdff1aSopenharmony_ci ldr r12, [sp, #64] @ flim_I 626cabdff1aSopenharmony_ci 627cabdff1aSopenharmony_ci @ Load pixels: 628cabdff1aSopenharmony_ci vld1.8 {d0}, [r0], r2 @ load u 629cabdff1aSopenharmony_ci vld1.8 {d1}, [r1], r2 @ load v 630cabdff1aSopenharmony_ci vld1.8 {d2}, [r0], r2 631cabdff1aSopenharmony_ci vld1.8 {d3}, [r1], r2 632cabdff1aSopenharmony_ci vld1.8 {d4}, [r0], r2 633cabdff1aSopenharmony_ci vld1.8 {d5}, [r1], r2 634cabdff1aSopenharmony_ci vld1.8 {d6}, [r0], r2 635cabdff1aSopenharmony_ci vld1.8 {d7}, [r1], r2 636cabdff1aSopenharmony_ci vld1.8 {d8}, [r0], r2 637cabdff1aSopenharmony_ci vld1.8 {d9}, [r1], r2 638cabdff1aSopenharmony_ci vld1.8 {d10}, [r0], r2 639cabdff1aSopenharmony_ci vld1.8 {d11}, [r1], r2 640cabdff1aSopenharmony_ci vld1.8 {d12}, [r0], r2 641cabdff1aSopenharmony_ci vld1.8 {d13}, [r1], r2 642cabdff1aSopenharmony_ci vld1.8 {d14}, [r0], r2 643cabdff1aSopenharmony_ci vld1.8 {d15}, [r1], r2 644cabdff1aSopenharmony_ci 645cabdff1aSopenharmony_ci transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 646cabdff1aSopenharmony_ci 647cabdff1aSopenharmony_ci vdup.8 q14, r3 @ flim_E 648cabdff1aSopenharmony_ci vdup.8 q15, r12 @ flim_I 649cabdff1aSopenharmony_ci ldr r12, [sp, #68] @ hev_thresh 650cabdff1aSopenharmony_ci 651cabdff1aSopenharmony_ci vp8_loop_filter inner=\inner 652cabdff1aSopenharmony_ci 653cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #3 @ backup u 8 rows 654cabdff1aSopenharmony_ci sub r1, r1, r2, lsl #3 @ backup v 8 rows 655cabdff1aSopenharmony_ci 656cabdff1aSopenharmony_ci transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7 657cabdff1aSopenharmony_ci 658cabdff1aSopenharmony_ci @ Store pixels: 659cabdff1aSopenharmony_ci vst1.8 {d0}, [r0], r2 660cabdff1aSopenharmony_ci vst1.8 {d1}, [r1], r2 661cabdff1aSopenharmony_ci vst1.8 {d2}, [r0], r2 662cabdff1aSopenharmony_ci vst1.8 {d3}, [r1], r2 663cabdff1aSopenharmony_ci vst1.8 {d4}, [r0], r2 664cabdff1aSopenharmony_ci vst1.8 {d5}, [r1], r2 665cabdff1aSopenharmony_ci vst1.8 {d6}, [r0], r2 666cabdff1aSopenharmony_ci vst1.8 {d7}, [r1], r2 667cabdff1aSopenharmony_ci vst1.8 {d8}, [r0], r2 668cabdff1aSopenharmony_ci vst1.8 {d9}, [r1], r2 669cabdff1aSopenharmony_ci vst1.8 {d10}, [r0], r2 670cabdff1aSopenharmony_ci vst1.8 {d11}, [r1], r2 671cabdff1aSopenharmony_ci vst1.8 {d12}, [r0], r2 672cabdff1aSopenharmony_ci vst1.8 {d13}, [r1], r2 673cabdff1aSopenharmony_ci vst1.8 {d14}, [r0] 674cabdff1aSopenharmony_ci vst1.8 {d15}, [r1] 675cabdff1aSopenharmony_ci 676cabdff1aSopenharmony_ci vpop {q4-q7} 677cabdff1aSopenharmony_ci bx lr 678cabdff1aSopenharmony_ciendfunc 679cabdff1aSopenharmony_ci.endm 680cabdff1aSopenharmony_ci 681cabdff1aSopenharmony_civp8_h_loop_filter8uv 682cabdff1aSopenharmony_civp8_h_loop_filter8uv _inner, inner=1 683cabdff1aSopenharmony_ci 684cabdff1aSopenharmony_cifunction ff_put_vp8_pixels16_neon, export=1 685cabdff1aSopenharmony_ci ldr r12, [sp, #0] @ h 686cabdff1aSopenharmony_ci1: 687cabdff1aSopenharmony_ci subs r12, r12, #4 688cabdff1aSopenharmony_ci vld1.8 {q0}, [r2], r3 689cabdff1aSopenharmony_ci vld1.8 {q1}, [r2], r3 690cabdff1aSopenharmony_ci vld1.8 {q2}, [r2], r3 691cabdff1aSopenharmony_ci vld1.8 {q3}, [r2], r3 692cabdff1aSopenharmony_ci vst1.8 {q0}, [r0,:128], r1 693cabdff1aSopenharmony_ci vst1.8 {q1}, [r0,:128], r1 694cabdff1aSopenharmony_ci vst1.8 {q2}, [r0,:128], r1 695cabdff1aSopenharmony_ci vst1.8 {q3}, [r0,:128], r1 696cabdff1aSopenharmony_ci bgt 1b 697cabdff1aSopenharmony_ci bx lr 698cabdff1aSopenharmony_ciendfunc 699cabdff1aSopenharmony_ci 700cabdff1aSopenharmony_cifunction ff_put_vp8_pixels8_neon, export=1 701cabdff1aSopenharmony_ci ldr r12, [sp, #0] @ h 702cabdff1aSopenharmony_ci1: 703cabdff1aSopenharmony_ci subs r12, r12, #4 704cabdff1aSopenharmony_ci vld1.8 {d0}, [r2], r3 705cabdff1aSopenharmony_ci vld1.8 {d1}, [r2], r3 706cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 707cabdff1aSopenharmony_ci vld1.8 {d3}, [r2], r3 708cabdff1aSopenharmony_ci vst1.8 {d0}, [r0,:64], r1 709cabdff1aSopenharmony_ci vst1.8 {d1}, [r0,:64], r1 710cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 711cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r1 712cabdff1aSopenharmony_ci bgt 1b 713cabdff1aSopenharmony_ci bx lr 714cabdff1aSopenharmony_ciendfunc 715cabdff1aSopenharmony_ci 716cabdff1aSopenharmony_ci/* 4/6-tap 8th-pel MC */ 717cabdff1aSopenharmony_ci 718cabdff1aSopenharmony_ci.macro vp8_epel8_h6 d, a, b 719cabdff1aSopenharmony_ci vext.8 d27, \a, \b, #1 720cabdff1aSopenharmony_ci vmovl.u8 q8, \a 721cabdff1aSopenharmony_ci vext.8 d28, \a, \b, #2 722cabdff1aSopenharmony_ci vmovl.u8 q9, d27 723cabdff1aSopenharmony_ci vext.8 d29, \a, \b, #3 724cabdff1aSopenharmony_ci vmovl.u8 q10, d28 725cabdff1aSopenharmony_ci vext.8 d30, \a, \b, #4 726cabdff1aSopenharmony_ci vmovl.u8 q11, d29 727cabdff1aSopenharmony_ci vext.8 d31, \a, \b, #5 728cabdff1aSopenharmony_ci vmovl.u8 q12, d30 729cabdff1aSopenharmony_ci vmul.u16 q10, q10, d0[2] 730cabdff1aSopenharmony_ci vmovl.u8 q13, d31 731cabdff1aSopenharmony_ci vmul.u16 q11, q11, d0[3] 732cabdff1aSopenharmony_ci vmls.u16 q10, q9, d0[1] 733cabdff1aSopenharmony_ci vmls.u16 q11, q12, d1[0] 734cabdff1aSopenharmony_ci vmla.u16 q10, q8, d0[0] 735cabdff1aSopenharmony_ci vmla.u16 q11, q13, d1[1] 736cabdff1aSopenharmony_ci vqadd.s16 q11, q10, q11 737cabdff1aSopenharmony_ci vqrshrun.s16 \d, q11, #7 738cabdff1aSopenharmony_ci.endm 739cabdff1aSopenharmony_ci 740cabdff1aSopenharmony_ci.macro vp8_epel16_h6 d0, d1, s0, s1, s2, q0, q1 741cabdff1aSopenharmony_ci vext.8 q14, \q0, \q1, #3 742cabdff1aSopenharmony_ci vext.8 q15, \q0, \q1, #4 743cabdff1aSopenharmony_ci vmovl.u8 q11, d28 744cabdff1aSopenharmony_ci vmovl.u8 q14, d29 745cabdff1aSopenharmony_ci vext.8 q3, \q0, \q1, #2 746cabdff1aSopenharmony_ci vmovl.u8 q12, d30 747cabdff1aSopenharmony_ci vmovl.u8 q15, d31 748cabdff1aSopenharmony_ci vext.8 q8, \q0, \q1, #1 749cabdff1aSopenharmony_ci vmovl.u8 q10, d6 750cabdff1aSopenharmony_ci vmovl.u8 q3, d7 751cabdff1aSopenharmony_ci vext.8 q2, \q0, \q1, #5 752cabdff1aSopenharmony_ci vmovl.u8 q13, d4 753cabdff1aSopenharmony_ci vmovl.u8 q2, d5 754cabdff1aSopenharmony_ci vmovl.u8 q9, d16 755cabdff1aSopenharmony_ci vmovl.u8 q8, d17 756cabdff1aSopenharmony_ci vmul.u16 q11, q11, d0[3] 757cabdff1aSopenharmony_ci vmul.u16 q10, q10, d0[2] 758cabdff1aSopenharmony_ci vmul.u16 q3, q3, d0[2] 759cabdff1aSopenharmony_ci vmul.u16 q14, q14, d0[3] 760cabdff1aSopenharmony_ci vmls.u16 q11, q12, d1[0] 761cabdff1aSopenharmony_ci vmovl.u8 q12, \s0 762cabdff1aSopenharmony_ci vmovl.u8 q1, \s1 763cabdff1aSopenharmony_ci vmls.u16 q10, q9, d0[1] 764cabdff1aSopenharmony_ci vmls.u16 q3, q8, d0[1] 765cabdff1aSopenharmony_ci vmls.u16 q14, q15, d1[0] 766cabdff1aSopenharmony_ci vmla.u16 q10, q12, d0[0] 767cabdff1aSopenharmony_ci vmla.u16 q11, q13, d1[1] 768cabdff1aSopenharmony_ci vmla.u16 q3, q1, d0[0] 769cabdff1aSopenharmony_ci vmla.u16 q14, q2, d1[1] 770cabdff1aSopenharmony_ci vqadd.s16 q11, q10, q11 771cabdff1aSopenharmony_ci vqadd.s16 q14, q3, q14 772cabdff1aSopenharmony_ci vqrshrun.s16 \d0, q11, #7 773cabdff1aSopenharmony_ci vqrshrun.s16 \d1, q14, #7 774cabdff1aSopenharmony_ci.endm 775cabdff1aSopenharmony_ci 776cabdff1aSopenharmony_ci.macro vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6 777cabdff1aSopenharmony_ci vmovl.u8 q10, \s0 778cabdff1aSopenharmony_ci vmovl.u8 q11, \s3 779cabdff1aSopenharmony_ci vmovl.u8 q14, \s6 780cabdff1aSopenharmony_ci vmovl.u8 q9, \s1 781cabdff1aSopenharmony_ci vmovl.u8 q12, \s4 782cabdff1aSopenharmony_ci vmovl.u8 q8, \s2 783cabdff1aSopenharmony_ci vmovl.u8 q13, \s5 784cabdff1aSopenharmony_ci vmul.u16 q10, q10, d0[0] 785cabdff1aSopenharmony_ci vmul.u16 q15, q11, d0[3] 786cabdff1aSopenharmony_ci vmul.u16 q11, q11, d0[2] 787cabdff1aSopenharmony_ci vmul.u16 q14, q14, d1[1] 788cabdff1aSopenharmony_ci vmls.u16 q10, q9, d0[1] 789cabdff1aSopenharmony_ci vmls.u16 q15, q12, d1[0] 790cabdff1aSopenharmony_ci vmls.u16 q11, q8, d0[1] 791cabdff1aSopenharmony_ci vmls.u16 q14, q13, d1[0] 792cabdff1aSopenharmony_ci vmla.u16 q10, q8, d0[2] 793cabdff1aSopenharmony_ci vmla.u16 q15, q13, d1[1] 794cabdff1aSopenharmony_ci vmla.u16 q11, q9, d0[0] 795cabdff1aSopenharmony_ci vmla.u16 q14, q12, d0[3] 796cabdff1aSopenharmony_ci vqadd.s16 q15, q10, q15 797cabdff1aSopenharmony_ci vqadd.s16 q14, q11, q14 798cabdff1aSopenharmony_ci vqrshrun.s16 \d0, q15, #7 799cabdff1aSopenharmony_ci vqrshrun.s16 \d1, q14, #7 800cabdff1aSopenharmony_ci.endm 801cabdff1aSopenharmony_ci 802cabdff1aSopenharmony_ci.macro vp8_epel8_h4 d, a, b 803cabdff1aSopenharmony_ci vext.8 d28, \a, \b, #1 804cabdff1aSopenharmony_ci vmovl.u8 q9, \a 805cabdff1aSopenharmony_ci vext.8 d29, \a, \b, #2 806cabdff1aSopenharmony_ci vmovl.u8 q10, d28 807cabdff1aSopenharmony_ci vext.8 d30, \a, \b, #3 808cabdff1aSopenharmony_ci vmovl.u8 q11, d29 809cabdff1aSopenharmony_ci vmovl.u8 q12, d30 810cabdff1aSopenharmony_ci vmul.u16 q10, q10, d0[2] 811cabdff1aSopenharmony_ci vmul.u16 q11, q11, d0[3] 812cabdff1aSopenharmony_ci vmls.u16 q10, q9, d0[1] 813cabdff1aSopenharmony_ci vmls.u16 q11, q12, d1[0] 814cabdff1aSopenharmony_ci vqadd.s16 q11, q10, q11 815cabdff1aSopenharmony_ci vqrshrun.s16 \d, q11, #7 816cabdff1aSopenharmony_ci.endm 817cabdff1aSopenharmony_ci 818cabdff1aSopenharmony_ci.macro vp8_epel8_v4_y2 d0, d1, s0, s1, s2, s3, s4 819cabdff1aSopenharmony_ci vmovl.u8 q9, \s0 820cabdff1aSopenharmony_ci vmovl.u8 q10, \s1 821cabdff1aSopenharmony_ci vmovl.u8 q11, \s2 822cabdff1aSopenharmony_ci vmovl.u8 q12, \s3 823cabdff1aSopenharmony_ci vmovl.u8 q13, \s4 824cabdff1aSopenharmony_ci vmul.u16 q8, q10, d0[2] 825cabdff1aSopenharmony_ci vmul.u16 q14, q11, d0[3] 826cabdff1aSopenharmony_ci vmul.u16 q11, q11, d0[2] 827cabdff1aSopenharmony_ci vmul.u16 q15, q12, d0[3] 828cabdff1aSopenharmony_ci vmls.u16 q8, q9, d0[1] 829cabdff1aSopenharmony_ci vmls.u16 q14, q12, d1[0] 830cabdff1aSopenharmony_ci vmls.u16 q11, q10, d0[1] 831cabdff1aSopenharmony_ci vmls.u16 q15, q13, d1[0] 832cabdff1aSopenharmony_ci vqadd.s16 q8, q8, q14 833cabdff1aSopenharmony_ci vqadd.s16 q11, q11, q15 834cabdff1aSopenharmony_ci vqrshrun.s16 \d0, q8, #7 835cabdff1aSopenharmony_ci vqrshrun.s16 \d1, q11, #7 836cabdff1aSopenharmony_ci.endm 837cabdff1aSopenharmony_ci 838cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_v6_neon, export=1 839cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 840cabdff1aSopenharmony_ci push {r4,lr} 841cabdff1aSopenharmony_ci vpush {d8-d15} 842cabdff1aSopenharmony_ci 843cabdff1aSopenharmony_ci ldr r4, [sp, #80] @ my 844cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 845cabdff1aSopenharmony_ci ldr r12, [sp, #72] @ h 846cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 847cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 848cabdff1aSopenharmony_ci1: 849cabdff1aSopenharmony_ci vld1.8 {d2-d3}, [r2], r3 850cabdff1aSopenharmony_ci vld1.8 {d4-d5}, [r2], r3 851cabdff1aSopenharmony_ci vld1.8 {d6-d7}, [r2], r3 852cabdff1aSopenharmony_ci vld1.8 {d8-d9}, [r2], r3 853cabdff1aSopenharmony_ci vld1.8 {d10-d11},[r2], r3 854cabdff1aSopenharmony_ci vld1.8 {d12-d13},[r2], r3 855cabdff1aSopenharmony_ci vld1.8 {d14-d15},[r2] 856cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #2 857cabdff1aSopenharmony_ci 858cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d4, d2, d4, d6, d8, d10, d12, d14 859cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d3, d5, d3, d5, d7, d9, d11, d13, d15 860cabdff1aSopenharmony_ci 861cabdff1aSopenharmony_ci vst1.8 {d2-d3}, [r0,:128], r1 862cabdff1aSopenharmony_ci vst1.8 {d4-d5}, [r0,:128], r1 863cabdff1aSopenharmony_ci subs r12, r12, #2 864cabdff1aSopenharmony_ci bne 1b 865cabdff1aSopenharmony_ci 866cabdff1aSopenharmony_ci vpop {d8-d15} 867cabdff1aSopenharmony_ci pop {r4,pc} 868cabdff1aSopenharmony_ciendfunc 869cabdff1aSopenharmony_ci 870cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_h6_neon, export=1 871cabdff1aSopenharmony_ci sub r2, r2, #2 872cabdff1aSopenharmony_ci push {r4,lr} 873cabdff1aSopenharmony_ci 874cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 875cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 876cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 877cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 878cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 879cabdff1aSopenharmony_ci1: 880cabdff1aSopenharmony_ci vld1.8 {d2-d4}, [r2], r3 881cabdff1aSopenharmony_ci 882cabdff1aSopenharmony_ci vp8_epel16_h6 d2, d3, d2, d3, d4, q1, q2 883cabdff1aSopenharmony_ci 884cabdff1aSopenharmony_ci vst1.8 {d2-d3}, [r0,:128], r1 885cabdff1aSopenharmony_ci subs r12, r12, #1 886cabdff1aSopenharmony_ci bne 1b 887cabdff1aSopenharmony_ci 888cabdff1aSopenharmony_ci pop {r4,pc} 889cabdff1aSopenharmony_ciendfunc 890cabdff1aSopenharmony_ci 891cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_h6v6_neon, export=1 892cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 893cabdff1aSopenharmony_ci sub r2, r2, #2 894cabdff1aSopenharmony_ci push {r4,lr} 895cabdff1aSopenharmony_ci vpush {d8-d15} 896cabdff1aSopenharmony_ci 897cabdff1aSopenharmony_ci @ first pass (horizontal): 898cabdff1aSopenharmony_ci ldr r4, [sp, #64+8+4] @ mx 899cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 900cabdff1aSopenharmony_ci ldr r12, [sp, #64+8+0] @ h 901cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 902cabdff1aSopenharmony_ci sub sp, sp, #336+16 903cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 904cabdff1aSopenharmony_ci add lr, sp, #15 905cabdff1aSopenharmony_ci add r12, r12, #5 906cabdff1aSopenharmony_ci bic lr, lr, #15 907cabdff1aSopenharmony_ci1: 908cabdff1aSopenharmony_ci vld1.8 {d2,d3,d4}, [r2], r3 909cabdff1aSopenharmony_ci 910cabdff1aSopenharmony_ci vp8_epel16_h6 d2, d3, d2, d3, d4, q1, q2 911cabdff1aSopenharmony_ci 912cabdff1aSopenharmony_ci vst1.8 {d2-d3}, [lr,:128]! 913cabdff1aSopenharmony_ci subs r12, r12, #1 914cabdff1aSopenharmony_ci bne 1b 915cabdff1aSopenharmony_ci 916cabdff1aSopenharmony_ci @ second pass (vertical): 917cabdff1aSopenharmony_ci ldr r4, [sp, #336+16+64+8+8] @ my 918cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 919cabdff1aSopenharmony_ci ldr r12, [sp, #336+16+64+8+0] @ h 920cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 921cabdff1aSopenharmony_ci add lr, sp, #15 922cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 923cabdff1aSopenharmony_ci bic lr, lr, #15 924cabdff1aSopenharmony_ci2: 925cabdff1aSopenharmony_ci vld1.8 {d2-d5}, [lr,:128]! 926cabdff1aSopenharmony_ci vld1.8 {d6-d9}, [lr,:128]! 927cabdff1aSopenharmony_ci vld1.8 {d10-d13},[lr,:128]! 928cabdff1aSopenharmony_ci vld1.8 {d14-d15},[lr,:128] 929cabdff1aSopenharmony_ci sub lr, lr, #64 930cabdff1aSopenharmony_ci 931cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d4, d2, d4, d6, d8, d10, d12, d14 932cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d3, d5, d3, d5, d7, d9, d11, d13, d15 933cabdff1aSopenharmony_ci 934cabdff1aSopenharmony_ci vst1.8 {d2-d3}, [r0,:128], r1 935cabdff1aSopenharmony_ci vst1.8 {d4-d5}, [r0,:128], r1 936cabdff1aSopenharmony_ci subs r12, r12, #2 937cabdff1aSopenharmony_ci bne 2b 938cabdff1aSopenharmony_ci 939cabdff1aSopenharmony_ci add sp, sp, #336+16 940cabdff1aSopenharmony_ci vpop {d8-d15} 941cabdff1aSopenharmony_ci pop {r4,pc} 942cabdff1aSopenharmony_ciendfunc 943cabdff1aSopenharmony_ci 944cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_v6_neon, export=1 945cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 946cabdff1aSopenharmony_ci push {r4,lr} 947cabdff1aSopenharmony_ci 948cabdff1aSopenharmony_ci ldr r4, [sp, #16] @ my 949cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 950cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 951cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 952cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 953cabdff1aSopenharmony_ci1: 954cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 955cabdff1aSopenharmony_ci vld1.8 {d3}, [r2], r3 956cabdff1aSopenharmony_ci vld1.8 {d4}, [r2], r3 957cabdff1aSopenharmony_ci vld1.8 {d5}, [r2], r3 958cabdff1aSopenharmony_ci vld1.8 {d6}, [r2], r3 959cabdff1aSopenharmony_ci vld1.8 {d7}, [r2], r3 960cabdff1aSopenharmony_ci vld1.8 {d28}, [r2] 961cabdff1aSopenharmony_ci 962cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #2 963cabdff1aSopenharmony_ci 964cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d3, d2, d3, d4, d5, d6, d7, d28 965cabdff1aSopenharmony_ci 966cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 967cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r1 968cabdff1aSopenharmony_ci subs r12, r12, #2 969cabdff1aSopenharmony_ci bne 1b 970cabdff1aSopenharmony_ci 971cabdff1aSopenharmony_ci pop {r4,pc} 972cabdff1aSopenharmony_ciendfunc 973cabdff1aSopenharmony_ci 974cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6_neon, export=1 975cabdff1aSopenharmony_ci sub r2, r2, #2 976cabdff1aSopenharmony_ci push {r4,lr} 977cabdff1aSopenharmony_ci 978cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 979cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 980cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 981cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 982cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 983cabdff1aSopenharmony_ci1: 984cabdff1aSopenharmony_ci vld1.8 {d2,d3}, [r2], r3 985cabdff1aSopenharmony_ci 986cabdff1aSopenharmony_ci vp8_epel8_h6 d2, d2, d3 987cabdff1aSopenharmony_ci 988cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 989cabdff1aSopenharmony_ci subs r12, r12, #1 990cabdff1aSopenharmony_ci bne 1b 991cabdff1aSopenharmony_ci 992cabdff1aSopenharmony_ci pop {r4,pc} 993cabdff1aSopenharmony_ciendfunc 994cabdff1aSopenharmony_ci 995cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6v6_neon, export=1 996cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 997cabdff1aSopenharmony_ci sub r2, r2, #2 998cabdff1aSopenharmony_ci push {r4,lr} 999cabdff1aSopenharmony_ci 1000cabdff1aSopenharmony_ci @ first pass (horizontal): 1001cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1002cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1003cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1004cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1005cabdff1aSopenharmony_ci sub sp, sp, #168+16 1006cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1007cabdff1aSopenharmony_ci add lr, sp, #15 1008cabdff1aSopenharmony_ci add r12, r12, #5 1009cabdff1aSopenharmony_ci bic lr, lr, #15 1010cabdff1aSopenharmony_ci1: 1011cabdff1aSopenharmony_ci vld1.8 {d2,d3}, [r2], r3 1012cabdff1aSopenharmony_ci 1013cabdff1aSopenharmony_ci vp8_epel8_h6 d2, d2, d3 1014cabdff1aSopenharmony_ci 1015cabdff1aSopenharmony_ci vst1.8 {d2}, [lr,:64]! 1016cabdff1aSopenharmony_ci subs r12, r12, #1 1017cabdff1aSopenharmony_ci bne 1b 1018cabdff1aSopenharmony_ci 1019cabdff1aSopenharmony_ci @ second pass (vertical): 1020cabdff1aSopenharmony_ci ldr r4, [sp, #168+16+16] @ my 1021cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1022cabdff1aSopenharmony_ci ldr r12, [sp, #168+16+8] @ h 1023cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1024cabdff1aSopenharmony_ci add lr, sp, #15 1025cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1026cabdff1aSopenharmony_ci bic lr, lr, #15 1027cabdff1aSopenharmony_ci2: 1028cabdff1aSopenharmony_ci vld1.8 {d2-d5}, [lr,:128]! 1029cabdff1aSopenharmony_ci vld1.8 {d6-d7}, [lr,:128]! 1030cabdff1aSopenharmony_ci vld1.8 {d30}, [lr,:64] 1031cabdff1aSopenharmony_ci sub lr, lr, #32 1032cabdff1aSopenharmony_ci 1033cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d3, d2, d3, d4, d5, d6, d7, d30 1034cabdff1aSopenharmony_ci 1035cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 1036cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r1 1037cabdff1aSopenharmony_ci subs r12, r12, #2 1038cabdff1aSopenharmony_ci bne 2b 1039cabdff1aSopenharmony_ci 1040cabdff1aSopenharmony_ci add sp, sp, #168+16 1041cabdff1aSopenharmony_ci pop {r4,pc} 1042cabdff1aSopenharmony_ciendfunc 1043cabdff1aSopenharmony_ci 1044cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_v4_neon, export=1 1045cabdff1aSopenharmony_ci sub r2, r2, r3 1046cabdff1aSopenharmony_ci push {r4,lr} 1047cabdff1aSopenharmony_ci 1048cabdff1aSopenharmony_ci ldr r4, [sp, #16] @ my 1049cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1050cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1051cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1052cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1053cabdff1aSopenharmony_ci1: 1054cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 1055cabdff1aSopenharmony_ci vld1.8 {d3}, [r2], r3 1056cabdff1aSopenharmony_ci vld1.8 {d4}, [r2], r3 1057cabdff1aSopenharmony_ci vld1.8 {d5}, [r2], r3 1058cabdff1aSopenharmony_ci vld1.8 {d6}, [r2] 1059cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 1060cabdff1aSopenharmony_ci 1061cabdff1aSopenharmony_ci vp8_epel8_v4_y2 d2, d3, d2, d3, d4, d5, d6 1062cabdff1aSopenharmony_ci 1063cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 1064cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r1 1065cabdff1aSopenharmony_ci subs r12, r12, #2 1066cabdff1aSopenharmony_ci bne 1b 1067cabdff1aSopenharmony_ci 1068cabdff1aSopenharmony_ci pop {r4,pc} 1069cabdff1aSopenharmony_ciendfunc 1070cabdff1aSopenharmony_ci 1071cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4_neon, export=1 1072cabdff1aSopenharmony_ci sub r2, r2, #1 1073cabdff1aSopenharmony_ci push {r4,lr} 1074cabdff1aSopenharmony_ci 1075cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1076cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1077cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1078cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1079cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1080cabdff1aSopenharmony_ci1: 1081cabdff1aSopenharmony_ci vld1.8 {d2,d3}, [r2], r3 1082cabdff1aSopenharmony_ci 1083cabdff1aSopenharmony_ci vp8_epel8_h4 d2, d2, d3 1084cabdff1aSopenharmony_ci 1085cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 1086cabdff1aSopenharmony_ci subs r12, r12, #1 1087cabdff1aSopenharmony_ci bne 1b 1088cabdff1aSopenharmony_ci 1089cabdff1aSopenharmony_ci pop {r4,pc} 1090cabdff1aSopenharmony_ciendfunc 1091cabdff1aSopenharmony_ci 1092cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4v4_neon, export=1 1093cabdff1aSopenharmony_ci sub r2, r2, r3 1094cabdff1aSopenharmony_ci sub r2, r2, #1 1095cabdff1aSopenharmony_ci push {r4,lr} 1096cabdff1aSopenharmony_ci 1097cabdff1aSopenharmony_ci @ first pass (horizontal): 1098cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1099cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1100cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1101cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1102cabdff1aSopenharmony_ci sub sp, sp, #168+16 1103cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1104cabdff1aSopenharmony_ci add lr, sp, #15 1105cabdff1aSopenharmony_ci add r12, r12, #3 1106cabdff1aSopenharmony_ci bic lr, lr, #15 1107cabdff1aSopenharmony_ci1: 1108cabdff1aSopenharmony_ci vld1.8 {d2,d3}, [r2], r3 1109cabdff1aSopenharmony_ci 1110cabdff1aSopenharmony_ci vp8_epel8_h4 d2, d2, d3 1111cabdff1aSopenharmony_ci 1112cabdff1aSopenharmony_ci vst1.8 {d2}, [lr,:64]! 1113cabdff1aSopenharmony_ci subs r12, r12, #1 1114cabdff1aSopenharmony_ci bne 1b 1115cabdff1aSopenharmony_ci 1116cabdff1aSopenharmony_ci @ second pass (vertical): 1117cabdff1aSopenharmony_ci ldr r4, [sp, #168+16+16] @ my 1118cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1119cabdff1aSopenharmony_ci ldr r12, [sp, #168+16+8] @ h 1120cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1121cabdff1aSopenharmony_ci add lr, sp, #15 1122cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1123cabdff1aSopenharmony_ci bic lr, lr, #15 1124cabdff1aSopenharmony_ci2: 1125cabdff1aSopenharmony_ci vld1.8 {d2-d5}, [lr,:128]! 1126cabdff1aSopenharmony_ci vld1.8 {d6}, [lr,:64] 1127cabdff1aSopenharmony_ci sub lr, lr, #16 1128cabdff1aSopenharmony_ci 1129cabdff1aSopenharmony_ci vp8_epel8_v4_y2 d2, d3, d2, d3, d4, d5, d6 1130cabdff1aSopenharmony_ci 1131cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 1132cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r1 1133cabdff1aSopenharmony_ci subs r12, r12, #2 1134cabdff1aSopenharmony_ci bne 2b 1135cabdff1aSopenharmony_ci 1136cabdff1aSopenharmony_ci add sp, sp, #168+16 1137cabdff1aSopenharmony_ci pop {r4,pc} 1138cabdff1aSopenharmony_ciendfunc 1139cabdff1aSopenharmony_ci 1140cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6v4_neon, export=1 1141cabdff1aSopenharmony_ci sub r2, r2, r3 1142cabdff1aSopenharmony_ci sub r2, r2, #2 1143cabdff1aSopenharmony_ci push {r4,lr} 1144cabdff1aSopenharmony_ci 1145cabdff1aSopenharmony_ci @ first pass (horizontal): 1146cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1147cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1148cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1149cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1150cabdff1aSopenharmony_ci sub sp, sp, #168+16 1151cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1152cabdff1aSopenharmony_ci add lr, sp, #15 1153cabdff1aSopenharmony_ci add r12, r12, #3 1154cabdff1aSopenharmony_ci bic lr, lr, #15 1155cabdff1aSopenharmony_ci1: 1156cabdff1aSopenharmony_ci vld1.8 {d2,d3}, [r2], r3 1157cabdff1aSopenharmony_ci 1158cabdff1aSopenharmony_ci vp8_epel8_h6 d2, d2, d3 1159cabdff1aSopenharmony_ci 1160cabdff1aSopenharmony_ci vst1.8 {d2}, [lr,:64]! 1161cabdff1aSopenharmony_ci subs r12, r12, #1 1162cabdff1aSopenharmony_ci bne 1b 1163cabdff1aSopenharmony_ci 1164cabdff1aSopenharmony_ci @ second pass (vertical): 1165cabdff1aSopenharmony_ci ldr r4, [sp, #168+16+16] @ my 1166cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1167cabdff1aSopenharmony_ci ldr r12, [sp, #168+16+8] @ h 1168cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1169cabdff1aSopenharmony_ci add lr, sp, #15 1170cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1171cabdff1aSopenharmony_ci bic lr, lr, #15 1172cabdff1aSopenharmony_ci2: 1173cabdff1aSopenharmony_ci vld1.8 {d2-d5}, [lr,:128]! 1174cabdff1aSopenharmony_ci vld1.8 {d6}, [lr,:64] 1175cabdff1aSopenharmony_ci sub lr, lr, #16 1176cabdff1aSopenharmony_ci 1177cabdff1aSopenharmony_ci vp8_epel8_v4_y2 d2, d3, d2, d3, d4, d5, d6 1178cabdff1aSopenharmony_ci 1179cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 1180cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r1 1181cabdff1aSopenharmony_ci subs r12, r12, #2 1182cabdff1aSopenharmony_ci bne 2b 1183cabdff1aSopenharmony_ci 1184cabdff1aSopenharmony_ci add sp, sp, #168+16 1185cabdff1aSopenharmony_ci pop {r4,pc} 1186cabdff1aSopenharmony_ciendfunc 1187cabdff1aSopenharmony_ci 1188cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4v6_neon, export=1 1189cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 1190cabdff1aSopenharmony_ci sub r2, r2, #1 1191cabdff1aSopenharmony_ci push {r4,lr} 1192cabdff1aSopenharmony_ci 1193cabdff1aSopenharmony_ci @ first pass (horizontal): 1194cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1195cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1196cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1197cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1198cabdff1aSopenharmony_ci sub sp, sp, #168+16 1199cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1200cabdff1aSopenharmony_ci add lr, sp, #15 1201cabdff1aSopenharmony_ci add r12, r12, #5 1202cabdff1aSopenharmony_ci bic lr, lr, #15 1203cabdff1aSopenharmony_ci1: 1204cabdff1aSopenharmony_ci vld1.8 {d2,d3}, [r2], r3 1205cabdff1aSopenharmony_ci 1206cabdff1aSopenharmony_ci vp8_epel8_h4 d2, d2, d3 1207cabdff1aSopenharmony_ci 1208cabdff1aSopenharmony_ci vst1.8 {d2}, [lr,:64]! 1209cabdff1aSopenharmony_ci subs r12, r12, #1 1210cabdff1aSopenharmony_ci bne 1b 1211cabdff1aSopenharmony_ci 1212cabdff1aSopenharmony_ci @ second pass (vertical): 1213cabdff1aSopenharmony_ci ldr r4, [sp, #168+16+16] @ my 1214cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1215cabdff1aSopenharmony_ci ldr r12, [sp, #168+16+8] @ h 1216cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1217cabdff1aSopenharmony_ci add lr, sp, #15 1218cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1219cabdff1aSopenharmony_ci bic lr, lr, #15 1220cabdff1aSopenharmony_ci2: 1221cabdff1aSopenharmony_ci vld1.8 {d2-d5}, [lr,:128]! 1222cabdff1aSopenharmony_ci vld1.8 {d6-d7}, [lr,:128]! 1223cabdff1aSopenharmony_ci vld1.8 {d30}, [lr,:64] 1224cabdff1aSopenharmony_ci sub lr, lr, #32 1225cabdff1aSopenharmony_ci 1226cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d3, d2, d3, d4, d5, d6, d7, d30 1227cabdff1aSopenharmony_ci 1228cabdff1aSopenharmony_ci vst1.8 {d2}, [r0,:64], r1 1229cabdff1aSopenharmony_ci vst1.8 {d3}, [r0,:64], r1 1230cabdff1aSopenharmony_ci subs r12, r12, #2 1231cabdff1aSopenharmony_ci bne 2b 1232cabdff1aSopenharmony_ci 1233cabdff1aSopenharmony_ci add sp, sp, #168+16 1234cabdff1aSopenharmony_ci pop {r4,pc} 1235cabdff1aSopenharmony_ciendfunc 1236cabdff1aSopenharmony_ci 1237cabdff1aSopenharmony_ci.ltorg 1238cabdff1aSopenharmony_ci 1239cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_v6_neon, export=1 1240cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 1241cabdff1aSopenharmony_ci push {r4,lr} 1242cabdff1aSopenharmony_ci 1243cabdff1aSopenharmony_ci ldr r4, [sp, #16] @ my 1244cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1245cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1246cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1247cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1248cabdff1aSopenharmony_ci1: 1249cabdff1aSopenharmony_ci vld1.32 {d2[]}, [r2], r3 1250cabdff1aSopenharmony_ci vld1.32 {d3[]}, [r2], r3 1251cabdff1aSopenharmony_ci vld1.32 {d4[]}, [r2], r3 1252cabdff1aSopenharmony_ci vld1.32 {d5[]}, [r2], r3 1253cabdff1aSopenharmony_ci vld1.32 {d6[]}, [r2], r3 1254cabdff1aSopenharmony_ci vld1.32 {d7[]}, [r2], r3 1255cabdff1aSopenharmony_ci vld1.32 {d28[]}, [r2] 1256cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #2 1257cabdff1aSopenharmony_ci vld1.32 {d2[1]}, [r2], r3 1258cabdff1aSopenharmony_ci vld1.32 {d3[1]}, [r2], r3 1259cabdff1aSopenharmony_ci vld1.32 {d4[1]}, [r2], r3 1260cabdff1aSopenharmony_ci vld1.32 {d5[1]}, [r2], r3 1261cabdff1aSopenharmony_ci vld1.32 {d6[1]}, [r2], r3 1262cabdff1aSopenharmony_ci vld1.32 {d7[1]}, [r2], r3 1263cabdff1aSopenharmony_ci vld1.32 {d28[1]}, [r2] 1264cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #2 1265cabdff1aSopenharmony_ci 1266cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d3, d2, d3, d4, d5, d6, d7, d28 1267cabdff1aSopenharmony_ci 1268cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1269cabdff1aSopenharmony_ci vst1.32 {d3[0]}, [r0,:32], r1 1270cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r0,:32], r1 1271cabdff1aSopenharmony_ci vst1.32 {d3[1]}, [r0,:32], r1 1272cabdff1aSopenharmony_ci subs r12, r12, #4 1273cabdff1aSopenharmony_ci bne 1b 1274cabdff1aSopenharmony_ci 1275cabdff1aSopenharmony_ci pop {r4,pc} 1276cabdff1aSopenharmony_ciendfunc 1277cabdff1aSopenharmony_ci 1278cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6_neon, export=1 1279cabdff1aSopenharmony_ci sub r2, r2, #2 1280cabdff1aSopenharmony_ci push {r4,lr} 1281cabdff1aSopenharmony_ci 1282cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1283cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1284cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1285cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1286cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1287cabdff1aSopenharmony_ci1: 1288cabdff1aSopenharmony_ci vld1.8 {q1}, [r2], r3 1289cabdff1aSopenharmony_ci vp8_epel8_h6 d2, d2, d3 1290cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1291cabdff1aSopenharmony_ci subs r12, r12, #1 1292cabdff1aSopenharmony_ci bne 1b 1293cabdff1aSopenharmony_ci 1294cabdff1aSopenharmony_ci pop {r4,pc} 1295cabdff1aSopenharmony_ciendfunc 1296cabdff1aSopenharmony_ci 1297cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6v6_neon, export=1 1298cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 1299cabdff1aSopenharmony_ci sub r2, r2, #2 1300cabdff1aSopenharmony_ci push {r4,lr} 1301cabdff1aSopenharmony_ci 1302cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1303cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1304cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1305cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1306cabdff1aSopenharmony_ci sub sp, sp, #52+16 1307cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1308cabdff1aSopenharmony_ci add lr, sp, #15 1309cabdff1aSopenharmony_ci add r12, r12, #5 1310cabdff1aSopenharmony_ci bic lr, lr, #15 1311cabdff1aSopenharmony_ci1: 1312cabdff1aSopenharmony_ci vld1.8 {q1}, [r2], r3 1313cabdff1aSopenharmony_ci vp8_epel8_h6 d2, d2, d3 1314cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [lr,:32]! 1315cabdff1aSopenharmony_ci subs r12, r12, #1 1316cabdff1aSopenharmony_ci bne 1b 1317cabdff1aSopenharmony_ci 1318cabdff1aSopenharmony_ci ldr r4, [sp, #52+16+16] @ my 1319cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1320cabdff1aSopenharmony_ci ldr r12, [sp, #52+16+8] @ h 1321cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1322cabdff1aSopenharmony_ci add lr, sp, #15 1323cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1324cabdff1aSopenharmony_ci bic lr, lr, #15 1325cabdff1aSopenharmony_ci2: 1326cabdff1aSopenharmony_ci vld1.8 {d2-d3}, [lr,:128]! 1327cabdff1aSopenharmony_ci vld1.8 {d6}, [lr,:64]! 1328cabdff1aSopenharmony_ci vld1.32 {d28[]}, [lr,:32] 1329cabdff1aSopenharmony_ci sub lr, lr, #16 1330cabdff1aSopenharmony_ci vld1.8 {d4-d5}, [lr]! 1331cabdff1aSopenharmony_ci vld1.8 {d7}, [lr,:64]! 1332cabdff1aSopenharmony_ci vld1.32 {d28[1]}, [lr,:32] 1333cabdff1aSopenharmony_ci sub lr, lr, #16 1334cabdff1aSopenharmony_ci vtrn.32 q1, q2 1335cabdff1aSopenharmony_ci vtrn.32 d6, d7 1336cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d3, d2, d4, d3, d5, d6, d7, d28 1337cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1338cabdff1aSopenharmony_ci vst1.32 {d3[0]}, [r0,:32], r1 1339cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r0,:32], r1 1340cabdff1aSopenharmony_ci vst1.32 {d3[1]}, [r0,:32], r1 1341cabdff1aSopenharmony_ci subs r12, r12, #4 1342cabdff1aSopenharmony_ci bne 2b 1343cabdff1aSopenharmony_ci 1344cabdff1aSopenharmony_ci add sp, sp, #52+16 1345cabdff1aSopenharmony_ci pop {r4,pc} 1346cabdff1aSopenharmony_ciendfunc 1347cabdff1aSopenharmony_ci 1348cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4v6_neon, export=1 1349cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 1350cabdff1aSopenharmony_ci sub r2, r2, #1 1351cabdff1aSopenharmony_ci push {r4,lr} 1352cabdff1aSopenharmony_ci 1353cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1354cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1355cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1356cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1357cabdff1aSopenharmony_ci sub sp, sp, #52+16 1358cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1359cabdff1aSopenharmony_ci add lr, sp, #15 1360cabdff1aSopenharmony_ci add r12, r12, #5 1361cabdff1aSopenharmony_ci bic lr, lr, #15 1362cabdff1aSopenharmony_ci1: 1363cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 1364cabdff1aSopenharmony_ci vp8_epel8_h4 d2, d2, d2 1365cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [lr,:32]! 1366cabdff1aSopenharmony_ci subs r12, r12, #1 1367cabdff1aSopenharmony_ci bne 1b 1368cabdff1aSopenharmony_ci 1369cabdff1aSopenharmony_ci ldr r4, [sp, #52+16+16] @ my 1370cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1371cabdff1aSopenharmony_ci ldr r12, [sp, #52+16+8] @ h 1372cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1373cabdff1aSopenharmony_ci add lr, sp, #15 1374cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1375cabdff1aSopenharmony_ci bic lr, lr, #15 1376cabdff1aSopenharmony_ci2: 1377cabdff1aSopenharmony_ci vld1.8 {d2-d3}, [lr,:128]! 1378cabdff1aSopenharmony_ci vld1.8 {d6}, [lr,:64]! 1379cabdff1aSopenharmony_ci vld1.32 {d28[]}, [lr,:32] 1380cabdff1aSopenharmony_ci sub lr, lr, #16 1381cabdff1aSopenharmony_ci vld1.8 {d4-d5}, [lr]! 1382cabdff1aSopenharmony_ci vld1.8 {d7}, [lr,:64]! 1383cabdff1aSopenharmony_ci vld1.32 {d28[1]}, [lr,:32] 1384cabdff1aSopenharmony_ci sub lr, lr, #16 1385cabdff1aSopenharmony_ci vtrn.32 q1, q2 1386cabdff1aSopenharmony_ci vtrn.32 d6, d7 1387cabdff1aSopenharmony_ci vp8_epel8_v6_y2 d2, d3, d2, d4, d3, d5, d6, d7, d28 1388cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1389cabdff1aSopenharmony_ci vst1.32 {d3[0]}, [r0,:32], r1 1390cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r0,:32], r1 1391cabdff1aSopenharmony_ci vst1.32 {d3[1]}, [r0,:32], r1 1392cabdff1aSopenharmony_ci subs r12, r12, #4 1393cabdff1aSopenharmony_ci bne 2b 1394cabdff1aSopenharmony_ci 1395cabdff1aSopenharmony_ci add sp, sp, #52+16 1396cabdff1aSopenharmony_ci pop {r4,pc} 1397cabdff1aSopenharmony_ciendfunc 1398cabdff1aSopenharmony_ci 1399cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6v4_neon, export=1 1400cabdff1aSopenharmony_ci sub r2, r2, r3 1401cabdff1aSopenharmony_ci sub r2, r2, #2 1402cabdff1aSopenharmony_ci push {r4,lr} 1403cabdff1aSopenharmony_ci 1404cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1405cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1406cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1407cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1408cabdff1aSopenharmony_ci sub sp, sp, #44+16 1409cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1410cabdff1aSopenharmony_ci add lr, sp, #15 1411cabdff1aSopenharmony_ci add r12, r12, #3 1412cabdff1aSopenharmony_ci bic lr, lr, #15 1413cabdff1aSopenharmony_ci1: 1414cabdff1aSopenharmony_ci vld1.8 {q1}, [r2], r3 1415cabdff1aSopenharmony_ci vp8_epel8_h6 d2, d2, d3 1416cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [lr,:32]! 1417cabdff1aSopenharmony_ci subs r12, r12, #1 1418cabdff1aSopenharmony_ci bne 1b 1419cabdff1aSopenharmony_ci 1420cabdff1aSopenharmony_ci ldr r4, [sp, #44+16+16] @ my 1421cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1422cabdff1aSopenharmony_ci ldr r12, [sp, #44+16+8] @ h 1423cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1424cabdff1aSopenharmony_ci add lr, sp, #15 1425cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1426cabdff1aSopenharmony_ci bic lr, lr, #15 1427cabdff1aSopenharmony_ci2: 1428cabdff1aSopenharmony_ci vld1.8 {d2-d3}, [lr,:128]! 1429cabdff1aSopenharmony_ci vld1.32 {d6[]}, [lr,:32] 1430cabdff1aSopenharmony_ci sub lr, lr, #8 1431cabdff1aSopenharmony_ci vld1.8 {d4-d5}, [lr]! 1432cabdff1aSopenharmony_ci vld1.32 {d6[1]}, [lr,:32] 1433cabdff1aSopenharmony_ci sub lr, lr, #8 1434cabdff1aSopenharmony_ci vtrn.32 q1, q2 1435cabdff1aSopenharmony_ci vp8_epel8_v4_y2 d2, d3, d2, d4, d3, d5, d6 1436cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1437cabdff1aSopenharmony_ci vst1.32 {d3[0]}, [r0,:32], r1 1438cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r0,:32], r1 1439cabdff1aSopenharmony_ci vst1.32 {d3[1]}, [r0,:32], r1 1440cabdff1aSopenharmony_ci subs r12, r12, #4 1441cabdff1aSopenharmony_ci bne 2b 1442cabdff1aSopenharmony_ci 1443cabdff1aSopenharmony_ci add sp, sp, #44+16 1444cabdff1aSopenharmony_ci pop {r4,pc} 1445cabdff1aSopenharmony_ciendfunc 1446cabdff1aSopenharmony_ci 1447cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4_neon, export=1 1448cabdff1aSopenharmony_ci sub r2, r2, #1 1449cabdff1aSopenharmony_ci push {r4,lr} 1450cabdff1aSopenharmony_ci 1451cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1452cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1453cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1454cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1455cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1456cabdff1aSopenharmony_ci1: 1457cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 1458cabdff1aSopenharmony_ci vp8_epel8_h4 d2, d2, d2 1459cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1460cabdff1aSopenharmony_ci subs r12, r12, #1 1461cabdff1aSopenharmony_ci bne 1b 1462cabdff1aSopenharmony_ci 1463cabdff1aSopenharmony_ci pop {r4,pc} 1464cabdff1aSopenharmony_ciendfunc 1465cabdff1aSopenharmony_ci 1466cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_v4_neon, export=1 1467cabdff1aSopenharmony_ci sub r2, r2, r3 1468cabdff1aSopenharmony_ci push {r4,lr} 1469cabdff1aSopenharmony_ci 1470cabdff1aSopenharmony_ci ldr r4, [sp, #16] @ my 1471cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1472cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1473cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1474cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1475cabdff1aSopenharmony_ci1: 1476cabdff1aSopenharmony_ci vld1.32 {d2[]}, [r2], r3 1477cabdff1aSopenharmony_ci vld1.32 {d3[]}, [r2], r3 1478cabdff1aSopenharmony_ci vld1.32 {d4[]}, [r2], r3 1479cabdff1aSopenharmony_ci vld1.32 {d5[]}, [r2], r3 1480cabdff1aSopenharmony_ci vld1.32 {d6[]}, [r2] 1481cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 1482cabdff1aSopenharmony_ci vld1.32 {d2[1]}, [r2], r3 1483cabdff1aSopenharmony_ci vld1.32 {d3[1]}, [r2], r3 1484cabdff1aSopenharmony_ci vld1.32 {d4[1]}, [r2], r3 1485cabdff1aSopenharmony_ci vld1.32 {d5[1]}, [r2], r3 1486cabdff1aSopenharmony_ci vld1.32 {d6[1]}, [r2] 1487cabdff1aSopenharmony_ci sub r2, r2, r3, lsl #1 1488cabdff1aSopenharmony_ci 1489cabdff1aSopenharmony_ci vp8_epel8_v4_y2 d2, d3, d2, d3, d4, d5, d6 1490cabdff1aSopenharmony_ci 1491cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1492cabdff1aSopenharmony_ci vst1.32 {d3[0]}, [r0,:32], r1 1493cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r0,:32], r1 1494cabdff1aSopenharmony_ci vst1.32 {d3[1]}, [r0,:32], r1 1495cabdff1aSopenharmony_ci subs r12, r12, #4 1496cabdff1aSopenharmony_ci bne 1b 1497cabdff1aSopenharmony_ci 1498cabdff1aSopenharmony_ci pop {r4,pc} 1499cabdff1aSopenharmony_ciendfunc 1500cabdff1aSopenharmony_ci 1501cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4v4_neon, export=1 1502cabdff1aSopenharmony_ci sub r2, r2, r3 1503cabdff1aSopenharmony_ci sub r2, r2, #1 1504cabdff1aSopenharmony_ci push {r4,lr} 1505cabdff1aSopenharmony_ci 1506cabdff1aSopenharmony_ci ldr r4, [sp, #12] @ mx 1507cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1508cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ h 1509cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1510cabdff1aSopenharmony_ci sub sp, sp, #44+16 1511cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1512cabdff1aSopenharmony_ci add lr, sp, #15 1513cabdff1aSopenharmony_ci add r12, r12, #3 1514cabdff1aSopenharmony_ci bic lr, lr, #15 1515cabdff1aSopenharmony_ci1: 1516cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 1517cabdff1aSopenharmony_ci vp8_epel8_h4 d2, d2, d3 1518cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [lr,:32]! 1519cabdff1aSopenharmony_ci subs r12, r12, #1 1520cabdff1aSopenharmony_ci bne 1b 1521cabdff1aSopenharmony_ci 1522cabdff1aSopenharmony_ci ldr r4, [sp, #44+16+16] @ my 1523cabdff1aSopenharmony_ci movrel lr, subpel_filters-16 1524cabdff1aSopenharmony_ci ldr r12, [sp, #44+16+8] @ h 1525cabdff1aSopenharmony_ci add r4, lr, r4, lsl #4 1526cabdff1aSopenharmony_ci add lr, sp, #15 1527cabdff1aSopenharmony_ci vld1.16 {q0}, [r4,:128] 1528cabdff1aSopenharmony_ci bic lr, lr, #15 1529cabdff1aSopenharmony_ci2: 1530cabdff1aSopenharmony_ci vld1.8 {d2-d3}, [lr,:128]! 1531cabdff1aSopenharmony_ci vld1.32 {d6[]}, [lr,:32] 1532cabdff1aSopenharmony_ci sub lr, lr, #8 1533cabdff1aSopenharmony_ci vld1.8 {d4-d5}, [lr]! 1534cabdff1aSopenharmony_ci vld1.32 {d6[1]}, [lr,:32] 1535cabdff1aSopenharmony_ci sub lr, lr, #8 1536cabdff1aSopenharmony_ci vtrn.32 q1, q2 1537cabdff1aSopenharmony_ci vp8_epel8_v4_y2 d2, d3, d2, d4, d3, d5, d6 1538cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0,:32], r1 1539cabdff1aSopenharmony_ci vst1.32 {d3[0]}, [r0,:32], r1 1540cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r0,:32], r1 1541cabdff1aSopenharmony_ci vst1.32 {d3[1]}, [r0,:32], r1 1542cabdff1aSopenharmony_ci subs r12, r12, #4 1543cabdff1aSopenharmony_ci bne 2b 1544cabdff1aSopenharmony_ci 1545cabdff1aSopenharmony_ci add sp, sp, #44+16 1546cabdff1aSopenharmony_ci pop {r4,pc} 1547cabdff1aSopenharmony_ciendfunc 1548cabdff1aSopenharmony_ci 1549cabdff1aSopenharmony_ci@ note: worst case sum of all 6-tap filter values * 255 is 0x7f80 so 16 bit 1550cabdff1aSopenharmony_ci@ arithmetic can be used to apply filters 1551cabdff1aSopenharmony_ciconst subpel_filters, align=4 1552cabdff1aSopenharmony_ci .short 0, 6, 123, 12, 1, 0, 0, 0 1553cabdff1aSopenharmony_ci .short 2, 11, 108, 36, 8, 1, 0, 0 1554cabdff1aSopenharmony_ci .short 0, 9, 93, 50, 6, 0, 0, 0 1555cabdff1aSopenharmony_ci .short 3, 16, 77, 77, 16, 3, 0, 0 1556cabdff1aSopenharmony_ci .short 0, 6, 50, 93, 9, 0, 0, 0 1557cabdff1aSopenharmony_ci .short 1, 8, 36, 108, 11, 2, 0, 0 1558cabdff1aSopenharmony_ci .short 0, 1, 12, 123, 6, 0, 0, 0 1559cabdff1aSopenharmony_ciendconst 1560cabdff1aSopenharmony_ci 1561cabdff1aSopenharmony_ci/* Bilinear MC */ 1562cabdff1aSopenharmony_ci 1563cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_h_neon, export=1 1564cabdff1aSopenharmony_ci ldr r12, [sp, #4] @ mx 1565cabdff1aSopenharmony_ci vdup.8 d0, r12 1566cabdff1aSopenharmony_ci rsb r12, r12, #8 1567cabdff1aSopenharmony_ci vdup.8 d1, r12 1568cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1569cabdff1aSopenharmony_ci1: 1570cabdff1aSopenharmony_ci subs r12, r12, #2 1571cabdff1aSopenharmony_ci vld1.8 {d2-d4}, [r2], r3 1572cabdff1aSopenharmony_ci vext.8 q2, q1, q2, #1 1573cabdff1aSopenharmony_ci vmull.u8 q8, d2, d1 1574cabdff1aSopenharmony_ci vmlal.u8 q8, d4, d0 1575cabdff1aSopenharmony_ci vld1.8 {d18-d20},[r2], r3 1576cabdff1aSopenharmony_ci vmull.u8 q3, d3, d1 1577cabdff1aSopenharmony_ci vmlal.u8 q3, d5, d0 1578cabdff1aSopenharmony_ci vext.8 q10, q9, q10, #1 1579cabdff1aSopenharmony_ci vmull.u8 q11, d18, d1 1580cabdff1aSopenharmony_ci vmlal.u8 q11, d20, d0 1581cabdff1aSopenharmony_ci vmull.u8 q12, d19, d1 1582cabdff1aSopenharmony_ci vmlal.u8 q12, d21, d0 1583cabdff1aSopenharmony_ci vrshrn.u16 d4, q8, #3 1584cabdff1aSopenharmony_ci vrshrn.u16 d5, q3, #3 1585cabdff1aSopenharmony_ci vrshrn.u16 d6, q11, #3 1586cabdff1aSopenharmony_ci vrshrn.u16 d7, q12, #3 1587cabdff1aSopenharmony_ci vst1.8 {q2}, [r0,:128], r1 1588cabdff1aSopenharmony_ci vst1.8 {q3}, [r0,:128], r1 1589cabdff1aSopenharmony_ci bgt 1b 1590cabdff1aSopenharmony_ci 1591cabdff1aSopenharmony_ci bx lr 1592cabdff1aSopenharmony_ciendfunc 1593cabdff1aSopenharmony_ci 1594cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_v_neon, export=1 1595cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ my 1596cabdff1aSopenharmony_ci vdup.8 d0, r12 1597cabdff1aSopenharmony_ci rsb r12, r12, #8 1598cabdff1aSopenharmony_ci vdup.8 d1, r12 1599cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1600cabdff1aSopenharmony_ci vld1.8 {q1}, [r2], r3 1601cabdff1aSopenharmony_ci1: 1602cabdff1aSopenharmony_ci subs r12, r12, #2 1603cabdff1aSopenharmony_ci vld1.8 {q2}, [r2], r3 1604cabdff1aSopenharmony_ci vmull.u8 q3, d2, d1 1605cabdff1aSopenharmony_ci vmlal.u8 q3, d4, d0 1606cabdff1aSopenharmony_ci vmull.u8 q8, d3, d1 1607cabdff1aSopenharmony_ci vmlal.u8 q8, d5, d0 1608cabdff1aSopenharmony_ci vld1.8 {q1}, [r2], r3 1609cabdff1aSopenharmony_ci vmull.u8 q9, d4, d1 1610cabdff1aSopenharmony_ci vmlal.u8 q9, d2, d0 1611cabdff1aSopenharmony_ci vmull.u8 q10, d5, d1 1612cabdff1aSopenharmony_ci vmlal.u8 q10, d3, d0 1613cabdff1aSopenharmony_ci vrshrn.u16 d4, q3, #3 1614cabdff1aSopenharmony_ci vrshrn.u16 d5, q8, #3 1615cabdff1aSopenharmony_ci vrshrn.u16 d6, q9, #3 1616cabdff1aSopenharmony_ci vrshrn.u16 d7, q10, #3 1617cabdff1aSopenharmony_ci vst1.8 {q2}, [r0,:128], r1 1618cabdff1aSopenharmony_ci vst1.8 {q3}, [r0,:128], r1 1619cabdff1aSopenharmony_ci bgt 1b 1620cabdff1aSopenharmony_ci 1621cabdff1aSopenharmony_ci bx lr 1622cabdff1aSopenharmony_ciendfunc 1623cabdff1aSopenharmony_ci 1624cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_hv_neon, export=1 1625cabdff1aSopenharmony_ci ldr r12, [sp, #4] @ mx 1626cabdff1aSopenharmony_ci vdup.8 d0, r12 1627cabdff1aSopenharmony_ci rsb r12, r12, #8 1628cabdff1aSopenharmony_ci vdup.8 d1, r12 1629cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ my 1630cabdff1aSopenharmony_ci vdup.8 d2, r12 1631cabdff1aSopenharmony_ci rsb r12, r12, #8 1632cabdff1aSopenharmony_ci vdup.8 d3, r12 1633cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1634cabdff1aSopenharmony_ci 1635cabdff1aSopenharmony_ci vld1.8 {d4-d6}, [r2], r3 1636cabdff1aSopenharmony_ci vext.8 q3, q2, q3, #1 1637cabdff1aSopenharmony_ci vmull.u8 q8, d4, d1 1638cabdff1aSopenharmony_ci vmlal.u8 q8, d6, d0 1639cabdff1aSopenharmony_ci vmull.u8 q9, d5, d1 1640cabdff1aSopenharmony_ci vmlal.u8 q9, d7, d0 1641cabdff1aSopenharmony_ci vrshrn.u16 d4, q8, #3 1642cabdff1aSopenharmony_ci vrshrn.u16 d5, q9, #3 1643cabdff1aSopenharmony_ci1: 1644cabdff1aSopenharmony_ci subs r12, r12, #2 1645cabdff1aSopenharmony_ci vld1.8 {d18-d20},[r2], r3 1646cabdff1aSopenharmony_ci vext.8 q10, q9, q10, #1 1647cabdff1aSopenharmony_ci vmull.u8 q11, d18, d1 1648cabdff1aSopenharmony_ci vmlal.u8 q11, d20, d0 1649cabdff1aSopenharmony_ci vld1.8 {d26-d28},[r2], r3 1650cabdff1aSopenharmony_ci vmull.u8 q12, d19, d1 1651cabdff1aSopenharmony_ci vmlal.u8 q12, d21, d0 1652cabdff1aSopenharmony_ci vext.8 q14, q13, q14, #1 1653cabdff1aSopenharmony_ci vmull.u8 q8, d26, d1 1654cabdff1aSopenharmony_ci vmlal.u8 q8, d28, d0 1655cabdff1aSopenharmony_ci vmull.u8 q9, d27, d1 1656cabdff1aSopenharmony_ci vmlal.u8 q9, d29, d0 1657cabdff1aSopenharmony_ci vrshrn.u16 d6, q11, #3 1658cabdff1aSopenharmony_ci vrshrn.u16 d7, q12, #3 1659cabdff1aSopenharmony_ci vmull.u8 q12, d4, d3 1660cabdff1aSopenharmony_ci vmlal.u8 q12, d6, d2 1661cabdff1aSopenharmony_ci vmull.u8 q15, d5, d3 1662cabdff1aSopenharmony_ci vmlal.u8 q15, d7, d2 1663cabdff1aSopenharmony_ci vrshrn.u16 d4, q8, #3 1664cabdff1aSopenharmony_ci vrshrn.u16 d5, q9, #3 1665cabdff1aSopenharmony_ci vmull.u8 q10, d6, d3 1666cabdff1aSopenharmony_ci vmlal.u8 q10, d4, d2 1667cabdff1aSopenharmony_ci vmull.u8 q11, d7, d3 1668cabdff1aSopenharmony_ci vmlal.u8 q11, d5, d2 1669cabdff1aSopenharmony_ci vrshrn.u16 d24, q12, #3 1670cabdff1aSopenharmony_ci vrshrn.u16 d25, q15, #3 1671cabdff1aSopenharmony_ci vst1.8 {q12}, [r0,:128], r1 1672cabdff1aSopenharmony_ci vrshrn.u16 d20, q10, #3 1673cabdff1aSopenharmony_ci vrshrn.u16 d21, q11, #3 1674cabdff1aSopenharmony_ci vst1.8 {q10}, [r0,:128], r1 1675cabdff1aSopenharmony_ci bgt 1b 1676cabdff1aSopenharmony_ci 1677cabdff1aSopenharmony_ci bx lr 1678cabdff1aSopenharmony_ciendfunc 1679cabdff1aSopenharmony_ci 1680cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_h_neon, export=1 1681cabdff1aSopenharmony_ci ldr r12, [sp, #4] @ mx 1682cabdff1aSopenharmony_ci vdup.8 d0, r12 1683cabdff1aSopenharmony_ci rsb r12, r12, #8 1684cabdff1aSopenharmony_ci vdup.8 d1, r12 1685cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1686cabdff1aSopenharmony_ci1: 1687cabdff1aSopenharmony_ci subs r12, r12, #2 1688cabdff1aSopenharmony_ci vld1.8 {q1}, [r2], r3 1689cabdff1aSopenharmony_ci vext.8 d3, d2, d3, #1 1690cabdff1aSopenharmony_ci vmull.u8 q2, d2, d1 1691cabdff1aSopenharmony_ci vmlal.u8 q2, d3, d0 1692cabdff1aSopenharmony_ci vld1.8 {q3}, [r2], r3 1693cabdff1aSopenharmony_ci vext.8 d7, d6, d7, #1 1694cabdff1aSopenharmony_ci vmull.u8 q8, d6, d1 1695cabdff1aSopenharmony_ci vmlal.u8 q8, d7, d0 1696cabdff1aSopenharmony_ci vrshrn.u16 d4, q2, #3 1697cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #3 1698cabdff1aSopenharmony_ci vst1.8 {d4}, [r0,:64], r1 1699cabdff1aSopenharmony_ci vst1.8 {d16}, [r0,:64], r1 1700cabdff1aSopenharmony_ci bgt 1b 1701cabdff1aSopenharmony_ci 1702cabdff1aSopenharmony_ci bx lr 1703cabdff1aSopenharmony_ciendfunc 1704cabdff1aSopenharmony_ci 1705cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_v_neon, export=1 1706cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ my 1707cabdff1aSopenharmony_ci vdup.8 d0, r12 1708cabdff1aSopenharmony_ci rsb r12, r12, #8 1709cabdff1aSopenharmony_ci vdup.8 d1, r12 1710cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1711cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 1712cabdff1aSopenharmony_ci1: 1713cabdff1aSopenharmony_ci subs r12, r12, #2 1714cabdff1aSopenharmony_ci vld1.8 {d3}, [r2], r3 1715cabdff1aSopenharmony_ci vmull.u8 q2, d2, d1 1716cabdff1aSopenharmony_ci vmlal.u8 q2, d3, d0 1717cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 1718cabdff1aSopenharmony_ci vmull.u8 q3, d3, d1 1719cabdff1aSopenharmony_ci vmlal.u8 q3, d2, d0 1720cabdff1aSopenharmony_ci vrshrn.u16 d4, q2, #3 1721cabdff1aSopenharmony_ci vrshrn.u16 d6, q3, #3 1722cabdff1aSopenharmony_ci vst1.8 {d4}, [r0,:64], r1 1723cabdff1aSopenharmony_ci vst1.8 {d6}, [r0,:64], r1 1724cabdff1aSopenharmony_ci bgt 1b 1725cabdff1aSopenharmony_ci 1726cabdff1aSopenharmony_ci bx lr 1727cabdff1aSopenharmony_ciendfunc 1728cabdff1aSopenharmony_ci 1729cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_hv_neon, export=1 1730cabdff1aSopenharmony_ci ldr r12, [sp, #4] @ mx 1731cabdff1aSopenharmony_ci vdup.8 d0, r12 1732cabdff1aSopenharmony_ci rsb r12, r12, #8 1733cabdff1aSopenharmony_ci vdup.8 d1, r12 1734cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ my 1735cabdff1aSopenharmony_ci vdup.8 d2, r12 1736cabdff1aSopenharmony_ci rsb r12, r12, #8 1737cabdff1aSopenharmony_ci vdup.8 d3, r12 1738cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1739cabdff1aSopenharmony_ci 1740cabdff1aSopenharmony_ci vld1.8 {q2}, [r2], r3 1741cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 1742cabdff1aSopenharmony_ci vmull.u8 q9, d4, d1 1743cabdff1aSopenharmony_ci vmlal.u8 q9, d5, d0 1744cabdff1aSopenharmony_ci vrshrn.u16 d22, q9, #3 1745cabdff1aSopenharmony_ci1: 1746cabdff1aSopenharmony_ci subs r12, r12, #2 1747cabdff1aSopenharmony_ci vld1.8 {q3}, [r2], r3 1748cabdff1aSopenharmony_ci vext.8 d7, d6, d7, #1 1749cabdff1aSopenharmony_ci vmull.u8 q8, d6, d1 1750cabdff1aSopenharmony_ci vmlal.u8 q8, d7, d0 1751cabdff1aSopenharmony_ci vld1.8 {q2}, [r2], r3 1752cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 1753cabdff1aSopenharmony_ci vmull.u8 q9, d4, d1 1754cabdff1aSopenharmony_ci vmlal.u8 q9, d5, d0 1755cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #3 1756cabdff1aSopenharmony_ci vmull.u8 q10, d22, d3 1757cabdff1aSopenharmony_ci vmlal.u8 q10, d16, d2 1758cabdff1aSopenharmony_ci vrshrn.u16 d22, q9, #3 1759cabdff1aSopenharmony_ci vmull.u8 q12, d16, d3 1760cabdff1aSopenharmony_ci vmlal.u8 q12, d22, d2 1761cabdff1aSopenharmony_ci vrshrn.u16 d20, q10, #3 1762cabdff1aSopenharmony_ci vst1.8 {d20}, [r0,:64], r1 1763cabdff1aSopenharmony_ci vrshrn.u16 d23, q12, #3 1764cabdff1aSopenharmony_ci vst1.8 {d23}, [r0,:64], r1 1765cabdff1aSopenharmony_ci bgt 1b 1766cabdff1aSopenharmony_ci 1767cabdff1aSopenharmony_ci bx lr 1768cabdff1aSopenharmony_ciendfunc 1769cabdff1aSopenharmony_ci 1770cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_h_neon, export=1 1771cabdff1aSopenharmony_ci ldr r12, [sp, #4] @ mx 1772cabdff1aSopenharmony_ci vdup.8 d0, r12 1773cabdff1aSopenharmony_ci rsb r12, r12, #8 1774cabdff1aSopenharmony_ci vdup.8 d1, r12 1775cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1776cabdff1aSopenharmony_ci1: 1777cabdff1aSopenharmony_ci subs r12, r12, #2 1778cabdff1aSopenharmony_ci vld1.8 {d2}, [r2], r3 1779cabdff1aSopenharmony_ci vext.8 d3, d2, d3, #1 1780cabdff1aSopenharmony_ci vld1.8 {d6}, [r2], r3 1781cabdff1aSopenharmony_ci vext.8 d7, d6, d7, #1 1782cabdff1aSopenharmony_ci vtrn.32 q1, q3 1783cabdff1aSopenharmony_ci vmull.u8 q2, d2, d1 1784cabdff1aSopenharmony_ci vmlal.u8 q2, d3, d0 1785cabdff1aSopenharmony_ci vrshrn.u16 d4, q2, #3 1786cabdff1aSopenharmony_ci vst1.32 {d4[0]}, [r0,:32], r1 1787cabdff1aSopenharmony_ci vst1.32 {d4[1]}, [r0,:32], r1 1788cabdff1aSopenharmony_ci bgt 1b 1789cabdff1aSopenharmony_ci 1790cabdff1aSopenharmony_ci bx lr 1791cabdff1aSopenharmony_ciendfunc 1792cabdff1aSopenharmony_ci 1793cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_v_neon, export=1 1794cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ my 1795cabdff1aSopenharmony_ci vdup.8 d0, r12 1796cabdff1aSopenharmony_ci rsb r12, r12, #8 1797cabdff1aSopenharmony_ci vdup.8 d1, r12 1798cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1799cabdff1aSopenharmony_ci vld1.32 {d2[]}, [r2], r3 1800cabdff1aSopenharmony_ci1: 1801cabdff1aSopenharmony_ci vld1.32 {d3[]}, [r2] 1802cabdff1aSopenharmony_ci vld1.32 {d2[1]}, [r2], r3 1803cabdff1aSopenharmony_ci vld1.32 {d3[1]}, [r2], r3 1804cabdff1aSopenharmony_ci vmull.u8 q2, d2, d1 1805cabdff1aSopenharmony_ci vmlal.u8 q2, d3, d0 1806cabdff1aSopenharmony_ci vtrn.32 d3, d2 1807cabdff1aSopenharmony_ci vrshrn.u16 d4, q2, #3 1808cabdff1aSopenharmony_ci vst1.32 {d4[0]}, [r0,:32], r1 1809cabdff1aSopenharmony_ci vst1.32 {d4[1]}, [r0,:32], r1 1810cabdff1aSopenharmony_ci subs r12, r12, #2 1811cabdff1aSopenharmony_ci bgt 1b 1812cabdff1aSopenharmony_ci 1813cabdff1aSopenharmony_ci bx lr 1814cabdff1aSopenharmony_ciendfunc 1815cabdff1aSopenharmony_ci 1816cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_hv_neon, export=1 1817cabdff1aSopenharmony_ci ldr r12, [sp, #4] @ mx 1818cabdff1aSopenharmony_ci vdup.8 d0, r12 1819cabdff1aSopenharmony_ci rsb r12, r12, #8 1820cabdff1aSopenharmony_ci vdup.8 d1, r12 1821cabdff1aSopenharmony_ci ldr r12, [sp, #8] @ my 1822cabdff1aSopenharmony_ci vdup.8 d2, r12 1823cabdff1aSopenharmony_ci rsb r12, r12, #8 1824cabdff1aSopenharmony_ci vdup.8 d3, r12 1825cabdff1aSopenharmony_ci ldr r12, [sp] @ h 1826cabdff1aSopenharmony_ci 1827cabdff1aSopenharmony_ci vld1.8 {d4}, [r2], r3 1828cabdff1aSopenharmony_ci vext.8 d5, d4, d4, #1 1829cabdff1aSopenharmony_ci vmull.u8 q9, d4, d1 1830cabdff1aSopenharmony_ci vmlal.u8 q9, d5, d0 1831cabdff1aSopenharmony_ci vrshrn.u16 d22, q9, #3 1832cabdff1aSopenharmony_ci1: 1833cabdff1aSopenharmony_ci subs r12, r12, #2 1834cabdff1aSopenharmony_ci vld1.8 {d6}, [r2], r3 1835cabdff1aSopenharmony_ci vext.8 d7, d6, d6, #1 1836cabdff1aSopenharmony_ci vld1.8 {d4}, [r2], r3 1837cabdff1aSopenharmony_ci vext.8 d5, d4, d4, #1 1838cabdff1aSopenharmony_ci vtrn.32 q3, q2 1839cabdff1aSopenharmony_ci vmull.u8 q8, d6, d1 1840cabdff1aSopenharmony_ci vmlal.u8 q8, d7, d0 1841cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #3 1842cabdff1aSopenharmony_ci vmull.u8 q10, d16, d2 1843cabdff1aSopenharmony_ci vtrn.32 d22, d16 1844cabdff1aSopenharmony_ci vmlal.u8 q10, d22, d3 1845cabdff1aSopenharmony_ci vrev64.32 d22, d16 1846cabdff1aSopenharmony_ci vrshrn.u16 d20, q10, #3 1847cabdff1aSopenharmony_ci vst1.32 {d20[0]}, [r0,:32], r1 1848cabdff1aSopenharmony_ci vst1.32 {d20[1]}, [r0,:32], r1 1849cabdff1aSopenharmony_ci bgt 1b 1850cabdff1aSopenharmony_ci 1851cabdff1aSopenharmony_ci bx lr 1852cabdff1aSopenharmony_ciendfunc 1853