1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 22cabdff1aSopenharmony_ci#include "neon.S" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci /* H.264 loop filter */ 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci.macro h264_loop_filter_start 27cabdff1aSopenharmony_ci ldr r12, [sp] 28cabdff1aSopenharmony_ci tst r2, r2 29cabdff1aSopenharmony_ci ldr r12, [r12] 30cabdff1aSopenharmony_ci it ne 31cabdff1aSopenharmony_ci tstne r3, r3 32cabdff1aSopenharmony_ci vmov.32 d24[0], r12 33cabdff1aSopenharmony_ci and r12, r12, r12, lsl #16 34cabdff1aSopenharmony_ci it eq 35cabdff1aSopenharmony_ci bxeq lr 36cabdff1aSopenharmony_ci ands r12, r12, r12, lsl #8 37cabdff1aSopenharmony_ci it lt 38cabdff1aSopenharmony_ci bxlt lr 39cabdff1aSopenharmony_ci.endm 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci.macro h264_loop_filter_luma 42cabdff1aSopenharmony_ci vdup.8 q11, r2 @ alpha 43cabdff1aSopenharmony_ci vmovl.u8 q12, d24 44cabdff1aSopenharmony_ci vabd.u8 q6, q8, q0 @ abs(p0 - q0) 45cabdff1aSopenharmony_ci vmovl.u16 q12, d24 46cabdff1aSopenharmony_ci vabd.u8 q14, q9, q8 @ abs(p1 - p0) 47cabdff1aSopenharmony_ci vsli.16 q12, q12, #8 48cabdff1aSopenharmony_ci vabd.u8 q15, q1, q0 @ abs(q1 - q0) 49cabdff1aSopenharmony_ci vsli.32 q12, q12, #16 50cabdff1aSopenharmony_ci vclt.u8 q6, q6, q11 @ < alpha 51cabdff1aSopenharmony_ci vdup.8 q11, r3 @ beta 52cabdff1aSopenharmony_ci vclt.s8 q7, q12, #0 53cabdff1aSopenharmony_ci vclt.u8 q14, q14, q11 @ < beta 54cabdff1aSopenharmony_ci vclt.u8 q15, q15, q11 @ < beta 55cabdff1aSopenharmony_ci vbic q6, q6, q7 56cabdff1aSopenharmony_ci vabd.u8 q4, q10, q8 @ abs(p2 - p0) 57cabdff1aSopenharmony_ci vand q6, q6, q14 58cabdff1aSopenharmony_ci vabd.u8 q5, q2, q0 @ abs(q2 - q0) 59cabdff1aSopenharmony_ci vclt.u8 q4, q4, q11 @ < beta 60cabdff1aSopenharmony_ci vand q6, q6, q15 61cabdff1aSopenharmony_ci vclt.u8 q5, q5, q11 @ < beta 62cabdff1aSopenharmony_ci vand q4, q4, q6 63cabdff1aSopenharmony_ci vand q5, q5, q6 64cabdff1aSopenharmony_ci vand q12, q12, q6 65cabdff1aSopenharmony_ci vrhadd.u8 q14, q8, q0 66cabdff1aSopenharmony_ci vsub.i8 q6, q12, q4 67cabdff1aSopenharmony_ci vqadd.u8 q7, q9, q12 68cabdff1aSopenharmony_ci vhadd.u8 q10, q10, q14 69cabdff1aSopenharmony_ci vsub.i8 q6, q6, q5 70cabdff1aSopenharmony_ci vhadd.u8 q14, q2, q14 71cabdff1aSopenharmony_ci vmin.u8 q7, q7, q10 72cabdff1aSopenharmony_ci vqsub.u8 q11, q9, q12 73cabdff1aSopenharmony_ci vqadd.u8 q2, q1, q12 74cabdff1aSopenharmony_ci vmax.u8 q7, q7, q11 75cabdff1aSopenharmony_ci vqsub.u8 q11, q1, q12 76cabdff1aSopenharmony_ci vmin.u8 q14, q2, q14 77cabdff1aSopenharmony_ci vmovl.u8 q2, d0 78cabdff1aSopenharmony_ci vmax.u8 q14, q14, q11 79cabdff1aSopenharmony_ci vmovl.u8 q10, d1 80cabdff1aSopenharmony_ci vsubw.u8 q2, q2, d16 81cabdff1aSopenharmony_ci vsubw.u8 q10, q10, d17 82cabdff1aSopenharmony_ci vshl.i16 q2, q2, #2 83cabdff1aSopenharmony_ci vshl.i16 q10, q10, #2 84cabdff1aSopenharmony_ci vaddw.u8 q2, q2, d18 85cabdff1aSopenharmony_ci vaddw.u8 q10, q10, d19 86cabdff1aSopenharmony_ci vsubw.u8 q2, q2, d2 87cabdff1aSopenharmony_ci vsubw.u8 q10, q10, d3 88cabdff1aSopenharmony_ci vrshrn.i16 d4, q2, #3 89cabdff1aSopenharmony_ci vrshrn.i16 d5, q10, #3 90cabdff1aSopenharmony_ci vbsl q4, q7, q9 91cabdff1aSopenharmony_ci vbsl q5, q14, q1 92cabdff1aSopenharmony_ci vneg.s8 q7, q6 93cabdff1aSopenharmony_ci vmovl.u8 q14, d16 94cabdff1aSopenharmony_ci vmin.s8 q2, q2, q6 95cabdff1aSopenharmony_ci vmovl.u8 q6, d17 96cabdff1aSopenharmony_ci vmax.s8 q2, q2, q7 97cabdff1aSopenharmony_ci vmovl.u8 q11, d0 98cabdff1aSopenharmony_ci vmovl.u8 q12, d1 99cabdff1aSopenharmony_ci vaddw.s8 q14, q14, d4 100cabdff1aSopenharmony_ci vaddw.s8 q6, q6, d5 101cabdff1aSopenharmony_ci vsubw.s8 q11, q11, d4 102cabdff1aSopenharmony_ci vsubw.s8 q12, q12, d5 103cabdff1aSopenharmony_ci vqmovun.s16 d16, q14 104cabdff1aSopenharmony_ci vqmovun.s16 d17, q6 105cabdff1aSopenharmony_ci vqmovun.s16 d0, q11 106cabdff1aSopenharmony_ci vqmovun.s16 d1, q12 107cabdff1aSopenharmony_ci.endm 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_cifunction ff_h264_v_loop_filter_luma_neon, export=1 110cabdff1aSopenharmony_ci h264_loop_filter_start 111cabdff1aSopenharmony_ci 112cabdff1aSopenharmony_ci vld1.8 {d0, d1}, [r0,:128], r1 113cabdff1aSopenharmony_ci vld1.8 {d2, d3}, [r0,:128], r1 114cabdff1aSopenharmony_ci vld1.8 {d4, d5}, [r0,:128], r1 115cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #2 116cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1 117cabdff1aSopenharmony_ci vld1.8 {d20,d21}, [r0,:128], r1 118cabdff1aSopenharmony_ci vld1.8 {d18,d19}, [r0,:128], r1 119cabdff1aSopenharmony_ci vld1.8 {d16,d17}, [r0,:128], r1 120cabdff1aSopenharmony_ci 121cabdff1aSopenharmony_ci vpush {d8-d15} 122cabdff1aSopenharmony_ci 123cabdff1aSopenharmony_ci h264_loop_filter_luma 124cabdff1aSopenharmony_ci 125cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1 126cabdff1aSopenharmony_ci vst1.8 {d8, d9}, [r0,:128], r1 127cabdff1aSopenharmony_ci vst1.8 {d16,d17}, [r0,:128], r1 128cabdff1aSopenharmony_ci vst1.8 {d0, d1}, [r0,:128], r1 129cabdff1aSopenharmony_ci vst1.8 {d10,d11}, [r0,:128] 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci vpop {d8-d15} 132cabdff1aSopenharmony_ci bx lr 133cabdff1aSopenharmony_ciendfunc 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_cifunction ff_h264_h_loop_filter_luma_neon, export=1 136cabdff1aSopenharmony_ci h264_loop_filter_start 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci sub r0, r0, #4 139cabdff1aSopenharmony_ci vld1.8 {d6}, [r0], r1 140cabdff1aSopenharmony_ci vld1.8 {d20}, [r0], r1 141cabdff1aSopenharmony_ci vld1.8 {d18}, [r0], r1 142cabdff1aSopenharmony_ci vld1.8 {d16}, [r0], r1 143cabdff1aSopenharmony_ci vld1.8 {d0}, [r0], r1 144cabdff1aSopenharmony_ci vld1.8 {d2}, [r0], r1 145cabdff1aSopenharmony_ci vld1.8 {d4}, [r0], r1 146cabdff1aSopenharmony_ci vld1.8 {d26}, [r0], r1 147cabdff1aSopenharmony_ci vld1.8 {d7}, [r0], r1 148cabdff1aSopenharmony_ci vld1.8 {d21}, [r0], r1 149cabdff1aSopenharmony_ci vld1.8 {d19}, [r0], r1 150cabdff1aSopenharmony_ci vld1.8 {d17}, [r0], r1 151cabdff1aSopenharmony_ci vld1.8 {d1}, [r0], r1 152cabdff1aSopenharmony_ci vld1.8 {d3}, [r0], r1 153cabdff1aSopenharmony_ci vld1.8 {d5}, [r0], r1 154cabdff1aSopenharmony_ci vld1.8 {d27}, [r0], r1 155cabdff1aSopenharmony_ci 156cabdff1aSopenharmony_ci transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13 157cabdff1aSopenharmony_ci 158cabdff1aSopenharmony_ci vpush {d8-d15} 159cabdff1aSopenharmony_ci 160cabdff1aSopenharmony_ci h264_loop_filter_luma 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci transpose_4x4 q4, q8, q0, q5 163cabdff1aSopenharmony_ci 164cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #4 165cabdff1aSopenharmony_ci add r0, r0, #2 166cabdff1aSopenharmony_ci vst1.32 {d8[0]}, [r0], r1 167cabdff1aSopenharmony_ci vst1.32 {d16[0]}, [r0], r1 168cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0], r1 169cabdff1aSopenharmony_ci vst1.32 {d10[0]}, [r0], r1 170cabdff1aSopenharmony_ci vst1.32 {d8[1]}, [r0], r1 171cabdff1aSopenharmony_ci vst1.32 {d16[1]}, [r0], r1 172cabdff1aSopenharmony_ci vst1.32 {d0[1]}, [r0], r1 173cabdff1aSopenharmony_ci vst1.32 {d10[1]}, [r0], r1 174cabdff1aSopenharmony_ci vst1.32 {d9[0]}, [r0], r1 175cabdff1aSopenharmony_ci vst1.32 {d17[0]}, [r0], r1 176cabdff1aSopenharmony_ci vst1.32 {d1[0]}, [r0], r1 177cabdff1aSopenharmony_ci vst1.32 {d11[0]}, [r0], r1 178cabdff1aSopenharmony_ci vst1.32 {d9[1]}, [r0], r1 179cabdff1aSopenharmony_ci vst1.32 {d17[1]}, [r0], r1 180cabdff1aSopenharmony_ci vst1.32 {d1[1]}, [r0], r1 181cabdff1aSopenharmony_ci vst1.32 {d11[1]}, [r0], r1 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci vpop {d8-d15} 184cabdff1aSopenharmony_ci bx lr 185cabdff1aSopenharmony_ciendfunc 186cabdff1aSopenharmony_ci 187cabdff1aSopenharmony_ci.macro h264_loop_filter_chroma 188cabdff1aSopenharmony_ci vdup.8 d22, r2 @ alpha 189cabdff1aSopenharmony_ci vmovl.u8 q12, d24 190cabdff1aSopenharmony_ci vabd.u8 d26, d16, d0 @ abs(p0 - q0) 191cabdff1aSopenharmony_ci vmovl.u8 q2, d0 192cabdff1aSopenharmony_ci vabd.u8 d28, d18, d16 @ abs(p1 - p0) 193cabdff1aSopenharmony_ci vsubw.u8 q2, q2, d16 194cabdff1aSopenharmony_ci vsli.16 d24, d24, #8 195cabdff1aSopenharmony_ci vshl.i16 q2, q2, #2 196cabdff1aSopenharmony_ci vabd.u8 d30, d2, d0 @ abs(q1 - q0) 197cabdff1aSopenharmony_ci vaddw.u8 q2, q2, d18 198cabdff1aSopenharmony_ci vclt.u8 d26, d26, d22 @ < alpha 199cabdff1aSopenharmony_ci vsubw.u8 q2, q2, d2 200cabdff1aSopenharmony_ci vdup.8 d22, r3 @ beta 201cabdff1aSopenharmony_ci vrshrn.i16 d4, q2, #3 202cabdff1aSopenharmony_ci vclt.u8 d28, d28, d22 @ < beta 203cabdff1aSopenharmony_ci vclt.u8 d30, d30, d22 @ < beta 204cabdff1aSopenharmony_ci vmin.s8 d4, d4, d24 205cabdff1aSopenharmony_ci vneg.s8 d25, d24 206cabdff1aSopenharmony_ci vand d26, d26, d28 207cabdff1aSopenharmony_ci vmax.s8 d4, d4, d25 208cabdff1aSopenharmony_ci vand d26, d26, d30 209cabdff1aSopenharmony_ci vmovl.u8 q11, d0 210cabdff1aSopenharmony_ci vand d4, d4, d26 211cabdff1aSopenharmony_ci vmovl.u8 q14, d16 212cabdff1aSopenharmony_ci vaddw.s8 q14, q14, d4 213cabdff1aSopenharmony_ci vsubw.s8 q11, q11, d4 214cabdff1aSopenharmony_ci vqmovun.s16 d16, q14 215cabdff1aSopenharmony_ci vqmovun.s16 d0, q11 216cabdff1aSopenharmony_ci.endm 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_cifunction ff_h264_v_loop_filter_chroma_neon, export=1 219cabdff1aSopenharmony_ci h264_loop_filter_start 220cabdff1aSopenharmony_ci 221cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1 222cabdff1aSopenharmony_ci vld1.8 {d18}, [r0,:64], r1 223cabdff1aSopenharmony_ci vld1.8 {d16}, [r0,:64], r1 224cabdff1aSopenharmony_ci vld1.8 {d0}, [r0,:64], r1 225cabdff1aSopenharmony_ci vld1.8 {d2}, [r0,:64] 226cabdff1aSopenharmony_ci 227cabdff1aSopenharmony_ci h264_loop_filter_chroma 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #1 230cabdff1aSopenharmony_ci vst1.8 {d16}, [r0,:64], r1 231cabdff1aSopenharmony_ci vst1.8 {d0}, [r0,:64], r1 232cabdff1aSopenharmony_ci 233cabdff1aSopenharmony_ci bx lr 234cabdff1aSopenharmony_ciendfunc 235cabdff1aSopenharmony_ci 236cabdff1aSopenharmony_cifunction ff_h264_h_loop_filter_chroma_neon, export=1 237cabdff1aSopenharmony_ci h264_loop_filter_start 238cabdff1aSopenharmony_ci 239cabdff1aSopenharmony_ci sub r0, r0, #2 240cabdff1aSopenharmony_cih_loop_filter_chroma420: 241cabdff1aSopenharmony_ci vld1.32 {d18[0]}, [r0], r1 242cabdff1aSopenharmony_ci vld1.32 {d16[0]}, [r0], r1 243cabdff1aSopenharmony_ci vld1.32 {d0[0]}, [r0], r1 244cabdff1aSopenharmony_ci vld1.32 {d2[0]}, [r0], r1 245cabdff1aSopenharmony_ci vld1.32 {d18[1]}, [r0], r1 246cabdff1aSopenharmony_ci vld1.32 {d16[1]}, [r0], r1 247cabdff1aSopenharmony_ci vld1.32 {d0[1]}, [r0], r1 248cabdff1aSopenharmony_ci vld1.32 {d2[1]}, [r0], r1 249cabdff1aSopenharmony_ci 250cabdff1aSopenharmony_ci vtrn.16 d18, d0 251cabdff1aSopenharmony_ci vtrn.16 d16, d2 252cabdff1aSopenharmony_ci vtrn.8 d18, d16 253cabdff1aSopenharmony_ci vtrn.8 d0, d2 254cabdff1aSopenharmony_ci 255cabdff1aSopenharmony_ci h264_loop_filter_chroma 256cabdff1aSopenharmony_ci 257cabdff1aSopenharmony_ci vtrn.16 d18, d0 258cabdff1aSopenharmony_ci vtrn.16 d16, d2 259cabdff1aSopenharmony_ci vtrn.8 d18, d16 260cabdff1aSopenharmony_ci vtrn.8 d0, d2 261cabdff1aSopenharmony_ci 262cabdff1aSopenharmony_ci sub r0, r0, r1, lsl #3 263cabdff1aSopenharmony_ci vst1.32 {d18[0]}, [r0], r1 264cabdff1aSopenharmony_ci vst1.32 {d16[0]}, [r0], r1 265cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0], r1 266cabdff1aSopenharmony_ci vst1.32 {d2[0]}, [r0], r1 267cabdff1aSopenharmony_ci vst1.32 {d18[1]}, [r0], r1 268cabdff1aSopenharmony_ci vst1.32 {d16[1]}, [r0], r1 269cabdff1aSopenharmony_ci vst1.32 {d0[1]}, [r0], r1 270cabdff1aSopenharmony_ci vst1.32 {d2[1]}, [r0], r1 271cabdff1aSopenharmony_ci 272cabdff1aSopenharmony_ci bx lr 273cabdff1aSopenharmony_ciendfunc 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_cifunction ff_h264_h_loop_filter_chroma422_neon, export=1 276cabdff1aSopenharmony_ci h264_loop_filter_start 277cabdff1aSopenharmony_ci push {r4, lr} 278cabdff1aSopenharmony_ci add r4, r0, r1 279cabdff1aSopenharmony_ci add r1, r1, r1 280cabdff1aSopenharmony_ci sub r0, r0, #2 281cabdff1aSopenharmony_ci 282cabdff1aSopenharmony_ci bl h_loop_filter_chroma420 283cabdff1aSopenharmony_ci 284cabdff1aSopenharmony_ci ldr r12, [sp, #8] 285cabdff1aSopenharmony_ci ldr r12, [r12] 286cabdff1aSopenharmony_ci vmov.32 d24[0], r12 287cabdff1aSopenharmony_ci sub r0, r4, #2 288cabdff1aSopenharmony_ci 289cabdff1aSopenharmony_ci bl h_loop_filter_chroma420 290cabdff1aSopenharmony_ci pop {r4, pc} 291cabdff1aSopenharmony_ciendfunc 292cabdff1aSopenharmony_ci 293cabdff1aSopenharmony_ci@ Biweighted prediction 294cabdff1aSopenharmony_ci 295cabdff1aSopenharmony_ci.macro biweight_16 macs, macd 296cabdff1aSopenharmony_ci vdup.8 d0, r4 297cabdff1aSopenharmony_ci vdup.8 d1, r5 298cabdff1aSopenharmony_ci vmov q2, q8 299cabdff1aSopenharmony_ci vmov q3, q8 300cabdff1aSopenharmony_ci1: subs r3, r3, #2 301cabdff1aSopenharmony_ci vld1.8 {d20-d21},[r0,:128], r2 302cabdff1aSopenharmony_ci \macd q2, d0, d20 303cabdff1aSopenharmony_ci pld [r0] 304cabdff1aSopenharmony_ci \macd q3, d0, d21 305cabdff1aSopenharmony_ci vld1.8 {d22-d23},[r1,:128], r2 306cabdff1aSopenharmony_ci \macs q2, d1, d22 307cabdff1aSopenharmony_ci pld [r1] 308cabdff1aSopenharmony_ci \macs q3, d1, d23 309cabdff1aSopenharmony_ci vmov q12, q8 310cabdff1aSopenharmony_ci vld1.8 {d28-d29},[r0,:128], r2 311cabdff1aSopenharmony_ci vmov q13, q8 312cabdff1aSopenharmony_ci \macd q12, d0, d28 313cabdff1aSopenharmony_ci pld [r0] 314cabdff1aSopenharmony_ci \macd q13, d0, d29 315cabdff1aSopenharmony_ci vld1.8 {d30-d31},[r1,:128], r2 316cabdff1aSopenharmony_ci \macs q12, d1, d30 317cabdff1aSopenharmony_ci pld [r1] 318cabdff1aSopenharmony_ci \macs q13, d1, d31 319cabdff1aSopenharmony_ci vshl.s16 q2, q2, q9 320cabdff1aSopenharmony_ci vshl.s16 q3, q3, q9 321cabdff1aSopenharmony_ci vqmovun.s16 d4, q2 322cabdff1aSopenharmony_ci vqmovun.s16 d5, q3 323cabdff1aSopenharmony_ci vshl.s16 q12, q12, q9 324cabdff1aSopenharmony_ci vshl.s16 q13, q13, q9 325cabdff1aSopenharmony_ci vqmovun.s16 d24, q12 326cabdff1aSopenharmony_ci vqmovun.s16 d25, q13 327cabdff1aSopenharmony_ci vmov q3, q8 328cabdff1aSopenharmony_ci vst1.8 {d4- d5}, [r6,:128], r2 329cabdff1aSopenharmony_ci vmov q2, q8 330cabdff1aSopenharmony_ci vst1.8 {d24-d25},[r6,:128], r2 331cabdff1aSopenharmony_ci bne 1b 332cabdff1aSopenharmony_ci pop {r4-r6, pc} 333cabdff1aSopenharmony_ci.endm 334cabdff1aSopenharmony_ci 335cabdff1aSopenharmony_ci.macro biweight_8 macs, macd 336cabdff1aSopenharmony_ci vdup.8 d0, r4 337cabdff1aSopenharmony_ci vdup.8 d1, r5 338cabdff1aSopenharmony_ci vmov q1, q8 339cabdff1aSopenharmony_ci vmov q10, q8 340cabdff1aSopenharmony_ci1: subs r3, r3, #2 341cabdff1aSopenharmony_ci vld1.8 {d4},[r0,:64], r2 342cabdff1aSopenharmony_ci \macd q1, d0, d4 343cabdff1aSopenharmony_ci pld [r0] 344cabdff1aSopenharmony_ci vld1.8 {d5},[r1,:64], r2 345cabdff1aSopenharmony_ci \macs q1, d1, d5 346cabdff1aSopenharmony_ci pld [r1] 347cabdff1aSopenharmony_ci vld1.8 {d6},[r0,:64], r2 348cabdff1aSopenharmony_ci \macd q10, d0, d6 349cabdff1aSopenharmony_ci pld [r0] 350cabdff1aSopenharmony_ci vld1.8 {d7},[r1,:64], r2 351cabdff1aSopenharmony_ci \macs q10, d1, d7 352cabdff1aSopenharmony_ci pld [r1] 353cabdff1aSopenharmony_ci vshl.s16 q1, q1, q9 354cabdff1aSopenharmony_ci vqmovun.s16 d2, q1 355cabdff1aSopenharmony_ci vshl.s16 q10, q10, q9 356cabdff1aSopenharmony_ci vqmovun.s16 d4, q10 357cabdff1aSopenharmony_ci vmov q10, q8 358cabdff1aSopenharmony_ci vst1.8 {d2},[r6,:64], r2 359cabdff1aSopenharmony_ci vmov q1, q8 360cabdff1aSopenharmony_ci vst1.8 {d4},[r6,:64], r2 361cabdff1aSopenharmony_ci bne 1b 362cabdff1aSopenharmony_ci pop {r4-r6, pc} 363cabdff1aSopenharmony_ci.endm 364cabdff1aSopenharmony_ci 365cabdff1aSopenharmony_ci.macro biweight_4 macs, macd 366cabdff1aSopenharmony_ci vdup.8 d0, r4 367cabdff1aSopenharmony_ci vdup.8 d1, r5 368cabdff1aSopenharmony_ci vmov q1, q8 369cabdff1aSopenharmony_ci vmov q10, q8 370cabdff1aSopenharmony_ci1: subs r3, r3, #4 371cabdff1aSopenharmony_ci vld1.32 {d4[0]},[r0,:32], r2 372cabdff1aSopenharmony_ci vld1.32 {d4[1]},[r0,:32], r2 373cabdff1aSopenharmony_ci \macd q1, d0, d4 374cabdff1aSopenharmony_ci pld [r0] 375cabdff1aSopenharmony_ci vld1.32 {d5[0]},[r1,:32], r2 376cabdff1aSopenharmony_ci vld1.32 {d5[1]},[r1,:32], r2 377cabdff1aSopenharmony_ci \macs q1, d1, d5 378cabdff1aSopenharmony_ci pld [r1] 379cabdff1aSopenharmony_ci blt 2f 380cabdff1aSopenharmony_ci vld1.32 {d6[0]},[r0,:32], r2 381cabdff1aSopenharmony_ci vld1.32 {d6[1]},[r0,:32], r2 382cabdff1aSopenharmony_ci \macd q10, d0, d6 383cabdff1aSopenharmony_ci pld [r0] 384cabdff1aSopenharmony_ci vld1.32 {d7[0]},[r1,:32], r2 385cabdff1aSopenharmony_ci vld1.32 {d7[1]},[r1,:32], r2 386cabdff1aSopenharmony_ci \macs q10, d1, d7 387cabdff1aSopenharmony_ci pld [r1] 388cabdff1aSopenharmony_ci vshl.s16 q1, q1, q9 389cabdff1aSopenharmony_ci vqmovun.s16 d2, q1 390cabdff1aSopenharmony_ci vshl.s16 q10, q10, q9 391cabdff1aSopenharmony_ci vqmovun.s16 d4, q10 392cabdff1aSopenharmony_ci vmov q10, q8 393cabdff1aSopenharmony_ci vst1.32 {d2[0]},[r6,:32], r2 394cabdff1aSopenharmony_ci vst1.32 {d2[1]},[r6,:32], r2 395cabdff1aSopenharmony_ci vmov q1, q8 396cabdff1aSopenharmony_ci vst1.32 {d4[0]},[r6,:32], r2 397cabdff1aSopenharmony_ci vst1.32 {d4[1]},[r6,:32], r2 398cabdff1aSopenharmony_ci bne 1b 399cabdff1aSopenharmony_ci pop {r4-r6, pc} 400cabdff1aSopenharmony_ci2: vshl.s16 q1, q1, q9 401cabdff1aSopenharmony_ci vqmovun.s16 d2, q1 402cabdff1aSopenharmony_ci vst1.32 {d2[0]},[r6,:32], r2 403cabdff1aSopenharmony_ci vst1.32 {d2[1]},[r6,:32], r2 404cabdff1aSopenharmony_ci pop {r4-r6, pc} 405cabdff1aSopenharmony_ci.endm 406cabdff1aSopenharmony_ci 407cabdff1aSopenharmony_ci.macro biweight_func w 408cabdff1aSopenharmony_cifunction ff_biweight_h264_pixels_\w\()_neon, export=1 409cabdff1aSopenharmony_ci push {r4-r6, lr} 410cabdff1aSopenharmony_ci ldr r12, [sp, #16] 411cabdff1aSopenharmony_ci add r4, sp, #20 412cabdff1aSopenharmony_ci ldm r4, {r4-r6} 413cabdff1aSopenharmony_ci lsr lr, r4, #31 414cabdff1aSopenharmony_ci add r6, r6, #1 415cabdff1aSopenharmony_ci eors lr, lr, r5, lsr #30 416cabdff1aSopenharmony_ci orr r6, r6, #1 417cabdff1aSopenharmony_ci vdup.16 q9, r12 418cabdff1aSopenharmony_ci lsl r6, r6, r12 419cabdff1aSopenharmony_ci vmvn q9, q9 420cabdff1aSopenharmony_ci vdup.16 q8, r6 421cabdff1aSopenharmony_ci mov r6, r0 422cabdff1aSopenharmony_ci beq 10f 423cabdff1aSopenharmony_ci subs lr, lr, #1 424cabdff1aSopenharmony_ci beq 20f 425cabdff1aSopenharmony_ci subs lr, lr, #1 426cabdff1aSopenharmony_ci beq 30f 427cabdff1aSopenharmony_ci b 40f 428cabdff1aSopenharmony_ci10: biweight_\w vmlal.u8, vmlal.u8 429cabdff1aSopenharmony_ci20: rsb r4, r4, #0 430cabdff1aSopenharmony_ci biweight_\w vmlal.u8, vmlsl.u8 431cabdff1aSopenharmony_ci30: rsb r4, r4, #0 432cabdff1aSopenharmony_ci rsb r5, r5, #0 433cabdff1aSopenharmony_ci biweight_\w vmlsl.u8, vmlsl.u8 434cabdff1aSopenharmony_ci40: rsb r5, r5, #0 435cabdff1aSopenharmony_ci biweight_\w vmlsl.u8, vmlal.u8 436cabdff1aSopenharmony_ciendfunc 437cabdff1aSopenharmony_ci.endm 438cabdff1aSopenharmony_ci 439cabdff1aSopenharmony_ci biweight_func 16 440cabdff1aSopenharmony_ci biweight_func 8 441cabdff1aSopenharmony_ci biweight_func 4 442cabdff1aSopenharmony_ci 443cabdff1aSopenharmony_ci@ Weighted prediction 444cabdff1aSopenharmony_ci 445cabdff1aSopenharmony_ci.macro weight_16 add 446cabdff1aSopenharmony_ci vdup.8 d0, r12 447cabdff1aSopenharmony_ci1: subs r2, r2, #2 448cabdff1aSopenharmony_ci vld1.8 {d20-d21},[r0,:128], r1 449cabdff1aSopenharmony_ci vmull.u8 q2, d0, d20 450cabdff1aSopenharmony_ci pld [r0] 451cabdff1aSopenharmony_ci vmull.u8 q3, d0, d21 452cabdff1aSopenharmony_ci vld1.8 {d28-d29},[r0,:128], r1 453cabdff1aSopenharmony_ci vmull.u8 q12, d0, d28 454cabdff1aSopenharmony_ci pld [r0] 455cabdff1aSopenharmony_ci vmull.u8 q13, d0, d29 456cabdff1aSopenharmony_ci \add q2, q8, q2 457cabdff1aSopenharmony_ci vrshl.s16 q2, q2, q9 458cabdff1aSopenharmony_ci \add q3, q8, q3 459cabdff1aSopenharmony_ci vrshl.s16 q3, q3, q9 460cabdff1aSopenharmony_ci vqmovun.s16 d4, q2 461cabdff1aSopenharmony_ci vqmovun.s16 d5, q3 462cabdff1aSopenharmony_ci \add q12, q8, q12 463cabdff1aSopenharmony_ci vrshl.s16 q12, q12, q9 464cabdff1aSopenharmony_ci \add q13, q8, q13 465cabdff1aSopenharmony_ci vrshl.s16 q13, q13, q9 466cabdff1aSopenharmony_ci vqmovun.s16 d24, q12 467cabdff1aSopenharmony_ci vqmovun.s16 d25, q13 468cabdff1aSopenharmony_ci vst1.8 {d4- d5}, [r4,:128], r1 469cabdff1aSopenharmony_ci vst1.8 {d24-d25},[r4,:128], r1 470cabdff1aSopenharmony_ci bne 1b 471cabdff1aSopenharmony_ci pop {r4, pc} 472cabdff1aSopenharmony_ci.endm 473cabdff1aSopenharmony_ci 474cabdff1aSopenharmony_ci.macro weight_8 add 475cabdff1aSopenharmony_ci vdup.8 d0, r12 476cabdff1aSopenharmony_ci1: subs r2, r2, #2 477cabdff1aSopenharmony_ci vld1.8 {d4},[r0,:64], r1 478cabdff1aSopenharmony_ci vmull.u8 q1, d0, d4 479cabdff1aSopenharmony_ci pld [r0] 480cabdff1aSopenharmony_ci vld1.8 {d6},[r0,:64], r1 481cabdff1aSopenharmony_ci vmull.u8 q10, d0, d6 482cabdff1aSopenharmony_ci \add q1, q8, q1 483cabdff1aSopenharmony_ci pld [r0] 484cabdff1aSopenharmony_ci vrshl.s16 q1, q1, q9 485cabdff1aSopenharmony_ci vqmovun.s16 d2, q1 486cabdff1aSopenharmony_ci \add q10, q8, q10 487cabdff1aSopenharmony_ci vrshl.s16 q10, q10, q9 488cabdff1aSopenharmony_ci vqmovun.s16 d4, q10 489cabdff1aSopenharmony_ci vst1.8 {d2},[r4,:64], r1 490cabdff1aSopenharmony_ci vst1.8 {d4},[r4,:64], r1 491cabdff1aSopenharmony_ci bne 1b 492cabdff1aSopenharmony_ci pop {r4, pc} 493cabdff1aSopenharmony_ci.endm 494cabdff1aSopenharmony_ci 495cabdff1aSopenharmony_ci.macro weight_4 add 496cabdff1aSopenharmony_ci vdup.8 d0, r12 497cabdff1aSopenharmony_ci vmov q1, q8 498cabdff1aSopenharmony_ci vmov q10, q8 499cabdff1aSopenharmony_ci1: subs r2, r2, #4 500cabdff1aSopenharmony_ci vld1.32 {d4[0]},[r0,:32], r1 501cabdff1aSopenharmony_ci vld1.32 {d4[1]},[r0,:32], r1 502cabdff1aSopenharmony_ci vmull.u8 q1, d0, d4 503cabdff1aSopenharmony_ci pld [r0] 504cabdff1aSopenharmony_ci blt 2f 505cabdff1aSopenharmony_ci vld1.32 {d6[0]},[r0,:32], r1 506cabdff1aSopenharmony_ci vld1.32 {d6[1]},[r0,:32], r1 507cabdff1aSopenharmony_ci vmull.u8 q10, d0, d6 508cabdff1aSopenharmony_ci pld [r0] 509cabdff1aSopenharmony_ci \add q1, q8, q1 510cabdff1aSopenharmony_ci vrshl.s16 q1, q1, q9 511cabdff1aSopenharmony_ci vqmovun.s16 d2, q1 512cabdff1aSopenharmony_ci \add q10, q8, q10 513cabdff1aSopenharmony_ci vrshl.s16 q10, q10, q9 514cabdff1aSopenharmony_ci vqmovun.s16 d4, q10 515cabdff1aSopenharmony_ci vmov q10, q8 516cabdff1aSopenharmony_ci vst1.32 {d2[0]},[r4,:32], r1 517cabdff1aSopenharmony_ci vst1.32 {d2[1]},[r4,:32], r1 518cabdff1aSopenharmony_ci vmov q1, q8 519cabdff1aSopenharmony_ci vst1.32 {d4[0]},[r4,:32], r1 520cabdff1aSopenharmony_ci vst1.32 {d4[1]},[r4,:32], r1 521cabdff1aSopenharmony_ci bne 1b 522cabdff1aSopenharmony_ci pop {r4, pc} 523cabdff1aSopenharmony_ci2: \add q1, q8, q1 524cabdff1aSopenharmony_ci vrshl.s16 q1, q1, q9 525cabdff1aSopenharmony_ci vqmovun.s16 d2, q1 526cabdff1aSopenharmony_ci vst1.32 {d2[0]},[r4,:32], r1 527cabdff1aSopenharmony_ci vst1.32 {d2[1]},[r4,:32], r1 528cabdff1aSopenharmony_ci pop {r4, pc} 529cabdff1aSopenharmony_ci.endm 530cabdff1aSopenharmony_ci 531cabdff1aSopenharmony_ci.macro weight_func w 532cabdff1aSopenharmony_cifunction ff_weight_h264_pixels_\w\()_neon, export=1 533cabdff1aSopenharmony_ci push {r4, lr} 534cabdff1aSopenharmony_ci ldr r12, [sp, #8] 535cabdff1aSopenharmony_ci ldr r4, [sp, #12] 536cabdff1aSopenharmony_ci cmp r3, #1 537cabdff1aSopenharmony_ci lsl r4, r4, r3 538cabdff1aSopenharmony_ci vdup.16 q8, r4 539cabdff1aSopenharmony_ci mov r4, r0 540cabdff1aSopenharmony_ci ble 20f 541cabdff1aSopenharmony_ci rsb lr, r3, #1 542cabdff1aSopenharmony_ci vdup.16 q9, lr 543cabdff1aSopenharmony_ci cmp r12, #0 544cabdff1aSopenharmony_ci blt 10f 545cabdff1aSopenharmony_ci weight_\w vhadd.s16 546cabdff1aSopenharmony_ci10: rsb r12, r12, #0 547cabdff1aSopenharmony_ci weight_\w vhsub.s16 548cabdff1aSopenharmony_ci20: rsb lr, r3, #0 549cabdff1aSopenharmony_ci vdup.16 q9, lr 550cabdff1aSopenharmony_ci cmp r12, #0 551cabdff1aSopenharmony_ci blt 10f 552cabdff1aSopenharmony_ci weight_\w vadd.s16 553cabdff1aSopenharmony_ci10: rsb r12, r12, #0 554cabdff1aSopenharmony_ci weight_\w vsub.s16 555cabdff1aSopenharmony_ciendfunc 556cabdff1aSopenharmony_ci.endm 557cabdff1aSopenharmony_ci 558cabdff1aSopenharmony_ci weight_func 16 559cabdff1aSopenharmony_ci weight_func 8 560cabdff1aSopenharmony_ci weight_func 4 561