1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "config_components.h" 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ 26cabdff1aSopenharmony_ci.macro h264_chroma_mc8 type, codec=h264 27cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc8_neon, export=1 28cabdff1aSopenharmony_ci push {r4-r7, lr} 29cabdff1aSopenharmony_ci ldrd r4, r5, [sp, #20] 30cabdff1aSopenharmony_ci .ifc \type,avg 31cabdff1aSopenharmony_ci mov lr, r0 32cabdff1aSopenharmony_ci .endif 33cabdff1aSopenharmony_ci pld [r1] 34cabdff1aSopenharmony_ci pld [r1, r2] 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci .ifc \codec,rv40 37cabdff1aSopenharmony_ci movrel r6, rv40bias 38cabdff1aSopenharmony_ci lsr r7, r5, #1 39cabdff1aSopenharmony_ci add r6, r6, r7, lsl #3 40cabdff1aSopenharmony_ci lsr r7, r4, #1 41cabdff1aSopenharmony_ci add r6, r6, r7, lsl #1 42cabdff1aSopenharmony_ci vld1.16 {d22[],d23[]}, [r6,:16] 43cabdff1aSopenharmony_ci .endif 44cabdff1aSopenharmony_ci .ifc \codec,vc1 45cabdff1aSopenharmony_ci vmov.u16 q11, #28 46cabdff1aSopenharmony_ci .endif 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_ciA muls r7, r4, r5 49cabdff1aSopenharmony_ciT mul r7, r4, r5 50cabdff1aSopenharmony_ciT cmp r7, #0 51cabdff1aSopenharmony_ci rsb r6, r7, r5, lsl #3 52cabdff1aSopenharmony_ci rsb r12, r7, r4, lsl #3 53cabdff1aSopenharmony_ci sub r4, r7, r4, lsl #3 54cabdff1aSopenharmony_ci sub r4, r4, r5, lsl #3 55cabdff1aSopenharmony_ci add r4, r4, #64 56cabdff1aSopenharmony_ci 57cabdff1aSopenharmony_ci beq 2f 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_ci vdup.8 d0, r4 60cabdff1aSopenharmony_ci vdup.8 d1, r12 61cabdff1aSopenharmony_ci vld1.8 {d4, d5}, [r1], r2 62cabdff1aSopenharmony_ci vdup.8 d2, r6 63cabdff1aSopenharmony_ci vdup.8 d3, r7 64cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 65cabdff1aSopenharmony_ci 66cabdff1aSopenharmony_ci1: vld1.8 {d6, d7}, [r1], r2 67cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 68cabdff1aSopenharmony_ci vmlal.u8 q8, d5, d1 69cabdff1aSopenharmony_ci vext.8 d7, d6, d7, #1 70cabdff1aSopenharmony_ci vld1.8 {d4, d5}, [r1], r2 71cabdff1aSopenharmony_ci vmlal.u8 q8, d6, d2 72cabdff1aSopenharmony_ci pld [r1] 73cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 74cabdff1aSopenharmony_ci vmlal.u8 q8, d7, d3 75cabdff1aSopenharmony_ci vmull.u8 q9, d6, d0 76cabdff1aSopenharmony_ci subs r3, r3, #2 77cabdff1aSopenharmony_ci vmlal.u8 q9, d7, d1 78cabdff1aSopenharmony_ci vmlal.u8 q9, d4, d2 79cabdff1aSopenharmony_ci vmlal.u8 q9, d5, d3 80cabdff1aSopenharmony_ci pld [r1, r2] 81cabdff1aSopenharmony_ci .ifc \codec,h264 82cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 83cabdff1aSopenharmony_ci vrshrn.u16 d17, q9, #6 84cabdff1aSopenharmony_ci .else 85cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 86cabdff1aSopenharmony_ci vadd.u16 q9, q9, q11 87cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 88cabdff1aSopenharmony_ci vshrn.u16 d17, q9, #6 89cabdff1aSopenharmony_ci .endif 90cabdff1aSopenharmony_ci .ifc \type,avg 91cabdff1aSopenharmony_ci vld1.8 {d20}, [lr,:64], r2 92cabdff1aSopenharmony_ci vld1.8 {d21}, [lr,:64], r2 93cabdff1aSopenharmony_ci vrhadd.u8 q8, q8, q10 94cabdff1aSopenharmony_ci .endif 95cabdff1aSopenharmony_ci vst1.8 {d16}, [r0,:64], r2 96cabdff1aSopenharmony_ci vst1.8 {d17}, [r0,:64], r2 97cabdff1aSopenharmony_ci bgt 1b 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_ci pop {r4-r7, pc} 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci2: adds r12, r12, r6 102cabdff1aSopenharmony_ci vdup.8 d0, r4 103cabdff1aSopenharmony_ci beq 5f 104cabdff1aSopenharmony_ci tst r6, r6 105cabdff1aSopenharmony_ci vdup.8 d1, r12 106cabdff1aSopenharmony_ci 107cabdff1aSopenharmony_ci beq 4f 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_ci vld1.8 {d4}, [r1], r2 110cabdff1aSopenharmony_ci 111cabdff1aSopenharmony_ci3: vld1.8 {d6}, [r1], r2 112cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 113cabdff1aSopenharmony_ci vmlal.u8 q8, d6, d1 114cabdff1aSopenharmony_ci vld1.8 {d4}, [r1], r2 115cabdff1aSopenharmony_ci vmull.u8 q9, d6, d0 116cabdff1aSopenharmony_ci vmlal.u8 q9, d4, d1 117cabdff1aSopenharmony_ci pld [r1] 118cabdff1aSopenharmony_ci .ifc \codec,h264 119cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 120cabdff1aSopenharmony_ci vrshrn.u16 d17, q9, #6 121cabdff1aSopenharmony_ci .else 122cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 123cabdff1aSopenharmony_ci vadd.u16 q9, q9, q11 124cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 125cabdff1aSopenharmony_ci vshrn.u16 d17, q9, #6 126cabdff1aSopenharmony_ci .endif 127cabdff1aSopenharmony_ci pld [r1, r2] 128cabdff1aSopenharmony_ci .ifc \type,avg 129cabdff1aSopenharmony_ci vld1.8 {d20}, [lr,:64], r2 130cabdff1aSopenharmony_ci vld1.8 {d21}, [lr,:64], r2 131cabdff1aSopenharmony_ci vrhadd.u8 q8, q8, q10 132cabdff1aSopenharmony_ci .endif 133cabdff1aSopenharmony_ci subs r3, r3, #2 134cabdff1aSopenharmony_ci vst1.8 {d16}, [r0,:64], r2 135cabdff1aSopenharmony_ci vst1.8 {d17}, [r0,:64], r2 136cabdff1aSopenharmony_ci bgt 3b 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci pop {r4-r7, pc} 139cabdff1aSopenharmony_ci 140cabdff1aSopenharmony_ci4: vld1.8 {d4, d5}, [r1], r2 141cabdff1aSopenharmony_ci vld1.8 {d6, d7}, [r1], r2 142cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 143cabdff1aSopenharmony_ci vext.8 d7, d6, d7, #1 144cabdff1aSopenharmony_ci pld [r1] 145cabdff1aSopenharmony_ci subs r3, r3, #2 146cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 147cabdff1aSopenharmony_ci vmlal.u8 q8, d5, d1 148cabdff1aSopenharmony_ci vmull.u8 q9, d6, d0 149cabdff1aSopenharmony_ci vmlal.u8 q9, d7, d1 150cabdff1aSopenharmony_ci pld [r1, r2] 151cabdff1aSopenharmony_ci .ifc \codec,h264 152cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 153cabdff1aSopenharmony_ci vrshrn.u16 d17, q9, #6 154cabdff1aSopenharmony_ci .else 155cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 156cabdff1aSopenharmony_ci vadd.u16 q9, q9, q11 157cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 158cabdff1aSopenharmony_ci vshrn.u16 d17, q9, #6 159cabdff1aSopenharmony_ci .endif 160cabdff1aSopenharmony_ci .ifc \type,avg 161cabdff1aSopenharmony_ci vld1.8 {d20}, [lr,:64], r2 162cabdff1aSopenharmony_ci vld1.8 {d21}, [lr,:64], r2 163cabdff1aSopenharmony_ci vrhadd.u8 q8, q8, q10 164cabdff1aSopenharmony_ci .endif 165cabdff1aSopenharmony_ci vst1.8 {d16}, [r0,:64], r2 166cabdff1aSopenharmony_ci vst1.8 {d17}, [r0,:64], r2 167cabdff1aSopenharmony_ci bgt 4b 168cabdff1aSopenharmony_ci 169cabdff1aSopenharmony_ci pop {r4-r7, pc} 170cabdff1aSopenharmony_ci 171cabdff1aSopenharmony_ci5: vld1.8 {d4}, [r1], r2 172cabdff1aSopenharmony_ci vld1.8 {d5}, [r1], r2 173cabdff1aSopenharmony_ci pld [r1] 174cabdff1aSopenharmony_ci subs r3, r3, #2 175cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 176cabdff1aSopenharmony_ci vmull.u8 q9, d5, d0 177cabdff1aSopenharmony_ci pld [r1, r2] 178cabdff1aSopenharmony_ci .ifc \codec,h264 179cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 180cabdff1aSopenharmony_ci vrshrn.u16 d17, q9, #6 181cabdff1aSopenharmony_ci .else 182cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 183cabdff1aSopenharmony_ci vadd.u16 q9, q9, q11 184cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 185cabdff1aSopenharmony_ci vshrn.u16 d17, q9, #6 186cabdff1aSopenharmony_ci .endif 187cabdff1aSopenharmony_ci .ifc \type,avg 188cabdff1aSopenharmony_ci vld1.8 {d20}, [lr,:64], r2 189cabdff1aSopenharmony_ci vld1.8 {d21}, [lr,:64], r2 190cabdff1aSopenharmony_ci vrhadd.u8 q8, q8, q10 191cabdff1aSopenharmony_ci .endif 192cabdff1aSopenharmony_ci vst1.8 {d16}, [r0,:64], r2 193cabdff1aSopenharmony_ci vst1.8 {d17}, [r0,:64], r2 194cabdff1aSopenharmony_ci bgt 5b 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_ci pop {r4-r7, pc} 197cabdff1aSopenharmony_ciendfunc 198cabdff1aSopenharmony_ci.endm 199cabdff1aSopenharmony_ci 200cabdff1aSopenharmony_ci/* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ 201cabdff1aSopenharmony_ci.macro h264_chroma_mc4 type, codec=h264 202cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc4_neon, export=1 203cabdff1aSopenharmony_ci push {r4-r7, lr} 204cabdff1aSopenharmony_ci ldrd r4, r5, [sp, #20] 205cabdff1aSopenharmony_ci .ifc \type,avg 206cabdff1aSopenharmony_ci mov lr, r0 207cabdff1aSopenharmony_ci .endif 208cabdff1aSopenharmony_ci pld [r1] 209cabdff1aSopenharmony_ci pld [r1, r2] 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci .ifc \codec,rv40 212cabdff1aSopenharmony_ci movrel r6, rv40bias 213cabdff1aSopenharmony_ci lsr r7, r5, #1 214cabdff1aSopenharmony_ci add r6, r6, r7, lsl #3 215cabdff1aSopenharmony_ci lsr r7, r4, #1 216cabdff1aSopenharmony_ci add r6, r6, r7, lsl #1 217cabdff1aSopenharmony_ci vld1.16 {d22[],d23[]}, [r6,:16] 218cabdff1aSopenharmony_ci .endif 219cabdff1aSopenharmony_ci .ifc \codec,vc1 220cabdff1aSopenharmony_ci vmov.u16 q11, #28 221cabdff1aSopenharmony_ci .endif 222cabdff1aSopenharmony_ci 223cabdff1aSopenharmony_ciA muls r7, r4, r5 224cabdff1aSopenharmony_ciT mul r7, r4, r5 225cabdff1aSopenharmony_ciT cmp r7, #0 226cabdff1aSopenharmony_ci rsb r6, r7, r5, lsl #3 227cabdff1aSopenharmony_ci rsb r12, r7, r4, lsl #3 228cabdff1aSopenharmony_ci sub r4, r7, r4, lsl #3 229cabdff1aSopenharmony_ci sub r4, r4, r5, lsl #3 230cabdff1aSopenharmony_ci add r4, r4, #64 231cabdff1aSopenharmony_ci 232cabdff1aSopenharmony_ci beq 2f 233cabdff1aSopenharmony_ci 234cabdff1aSopenharmony_ci vdup.8 d0, r4 235cabdff1aSopenharmony_ci vdup.8 d1, r12 236cabdff1aSopenharmony_ci vld1.8 {d4}, [r1], r2 237cabdff1aSopenharmony_ci vdup.8 d2, r6 238cabdff1aSopenharmony_ci vdup.8 d3, r7 239cabdff1aSopenharmony_ci 240cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 241cabdff1aSopenharmony_ci vtrn.32 d4, d5 242cabdff1aSopenharmony_ci 243cabdff1aSopenharmony_ci vtrn.32 d0, d1 244cabdff1aSopenharmony_ci vtrn.32 d2, d3 245cabdff1aSopenharmony_ci 246cabdff1aSopenharmony_ci1: vld1.8 {d6}, [r1], r2 247cabdff1aSopenharmony_ci vext.8 d7, d6, d7, #1 248cabdff1aSopenharmony_ci vtrn.32 d6, d7 249cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 250cabdff1aSopenharmony_ci vmlal.u8 q8, d6, d2 251cabdff1aSopenharmony_ci vld1.8 {d4}, [r1], r2 252cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 253cabdff1aSopenharmony_ci vtrn.32 d4, d5 254cabdff1aSopenharmony_ci pld [r1] 255cabdff1aSopenharmony_ci vmull.u8 q9, d6, d0 256cabdff1aSopenharmony_ci vmlal.u8 q9, d4, d2 257cabdff1aSopenharmony_ci vadd.i16 d16, d16, d17 258cabdff1aSopenharmony_ci vadd.i16 d17, d18, d19 259cabdff1aSopenharmony_ci .ifc \codec,h264 260cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 261cabdff1aSopenharmony_ci .else 262cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 263cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 264cabdff1aSopenharmony_ci .endif 265cabdff1aSopenharmony_ci subs r3, r3, #2 266cabdff1aSopenharmony_ci pld [r1, r2] 267cabdff1aSopenharmony_ci .ifc \type,avg 268cabdff1aSopenharmony_ci vld1.32 {d20[0]}, [lr,:32], r2 269cabdff1aSopenharmony_ci vld1.32 {d20[1]}, [lr,:32], r2 270cabdff1aSopenharmony_ci vrhadd.u8 d16, d16, d20 271cabdff1aSopenharmony_ci .endif 272cabdff1aSopenharmony_ci vst1.32 {d16[0]}, [r0,:32], r2 273cabdff1aSopenharmony_ci vst1.32 {d16[1]}, [r0,:32], r2 274cabdff1aSopenharmony_ci bgt 1b 275cabdff1aSopenharmony_ci 276cabdff1aSopenharmony_ci pop {r4-r7, pc} 277cabdff1aSopenharmony_ci 278cabdff1aSopenharmony_ci2: adds r12, r12, r6 279cabdff1aSopenharmony_ci vdup.8 d0, r4 280cabdff1aSopenharmony_ci beq 5f 281cabdff1aSopenharmony_ci tst r6, r6 282cabdff1aSopenharmony_ci vdup.8 d1, r12 283cabdff1aSopenharmony_ci vtrn.32 d0, d1 284cabdff1aSopenharmony_ci 285cabdff1aSopenharmony_ci beq 4f 286cabdff1aSopenharmony_ci 287cabdff1aSopenharmony_ci vext.32 d1, d0, d1, #1 288cabdff1aSopenharmony_ci vld1.32 {d4[0]}, [r1], r2 289cabdff1aSopenharmony_ci 290cabdff1aSopenharmony_ci3: vld1.32 {d4[1]}, [r1], r2 291cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 292cabdff1aSopenharmony_ci vld1.32 {d4[0]}, [r1], r2 293cabdff1aSopenharmony_ci vmull.u8 q9, d4, d1 294cabdff1aSopenharmony_ci vadd.i16 d16, d16, d17 295cabdff1aSopenharmony_ci vadd.i16 d17, d18, d19 296cabdff1aSopenharmony_ci pld [r1] 297cabdff1aSopenharmony_ci .ifc \codec,h264 298cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 299cabdff1aSopenharmony_ci .else 300cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 301cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 302cabdff1aSopenharmony_ci .endif 303cabdff1aSopenharmony_ci .ifc \type,avg 304cabdff1aSopenharmony_ci vld1.32 {d20[0]}, [lr,:32], r2 305cabdff1aSopenharmony_ci vld1.32 {d20[1]}, [lr,:32], r2 306cabdff1aSopenharmony_ci vrhadd.u8 d16, d16, d20 307cabdff1aSopenharmony_ci .endif 308cabdff1aSopenharmony_ci subs r3, r3, #2 309cabdff1aSopenharmony_ci pld [r1, r2] 310cabdff1aSopenharmony_ci vst1.32 {d16[0]}, [r0,:32], r2 311cabdff1aSopenharmony_ci vst1.32 {d16[1]}, [r0,:32], r2 312cabdff1aSopenharmony_ci bgt 3b 313cabdff1aSopenharmony_ci 314cabdff1aSopenharmony_ci pop {r4-r7, pc} 315cabdff1aSopenharmony_ci 316cabdff1aSopenharmony_ci4: vld1.8 {d4}, [r1], r2 317cabdff1aSopenharmony_ci vld1.8 {d6}, [r1], r2 318cabdff1aSopenharmony_ci vext.8 d5, d4, d5, #1 319cabdff1aSopenharmony_ci vext.8 d7, d6, d7, #1 320cabdff1aSopenharmony_ci vtrn.32 d4, d5 321cabdff1aSopenharmony_ci vtrn.32 d6, d7 322cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 323cabdff1aSopenharmony_ci vmull.u8 q9, d6, d0 324cabdff1aSopenharmony_ci subs r3, r3, #2 325cabdff1aSopenharmony_ci vadd.i16 d16, d16, d17 326cabdff1aSopenharmony_ci vadd.i16 d17, d18, d19 327cabdff1aSopenharmony_ci pld [r1] 328cabdff1aSopenharmony_ci .ifc \codec,h264 329cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 330cabdff1aSopenharmony_ci .else 331cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 332cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 333cabdff1aSopenharmony_ci .endif 334cabdff1aSopenharmony_ci .ifc \type,avg 335cabdff1aSopenharmony_ci vld1.32 {d20[0]}, [lr,:32], r2 336cabdff1aSopenharmony_ci vld1.32 {d20[1]}, [lr,:32], r2 337cabdff1aSopenharmony_ci vrhadd.u8 d16, d16, d20 338cabdff1aSopenharmony_ci .endif 339cabdff1aSopenharmony_ci pld [r1] 340cabdff1aSopenharmony_ci vst1.32 {d16[0]}, [r0,:32], r2 341cabdff1aSopenharmony_ci vst1.32 {d16[1]}, [r0,:32], r2 342cabdff1aSopenharmony_ci bgt 4b 343cabdff1aSopenharmony_ci 344cabdff1aSopenharmony_ci pop {r4-r7, pc} 345cabdff1aSopenharmony_ci 346cabdff1aSopenharmony_ci5: vld1.32 {d4[0]}, [r1], r2 347cabdff1aSopenharmony_ci vld1.32 {d4[1]}, [r1], r2 348cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 349cabdff1aSopenharmony_ci subs r3, r3, #2 350cabdff1aSopenharmony_ci pld [r1] 351cabdff1aSopenharmony_ci .ifc \codec,h264 352cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 353cabdff1aSopenharmony_ci .else 354cabdff1aSopenharmony_ci vadd.u16 q8, q8, q11 355cabdff1aSopenharmony_ci vshrn.u16 d16, q8, #6 356cabdff1aSopenharmony_ci .endif 357cabdff1aSopenharmony_ci .ifc \type,avg 358cabdff1aSopenharmony_ci vld1.32 {d20[0]}, [lr,:32], r2 359cabdff1aSopenharmony_ci vld1.32 {d20[1]}, [lr,:32], r2 360cabdff1aSopenharmony_ci vrhadd.u8 d16, d16, d20 361cabdff1aSopenharmony_ci .endif 362cabdff1aSopenharmony_ci pld [r1] 363cabdff1aSopenharmony_ci vst1.32 {d16[0]}, [r0,:32], r2 364cabdff1aSopenharmony_ci vst1.32 {d16[1]}, [r0,:32], r2 365cabdff1aSopenharmony_ci bgt 5b 366cabdff1aSopenharmony_ci 367cabdff1aSopenharmony_ci pop {r4-r7, pc} 368cabdff1aSopenharmony_ciendfunc 369cabdff1aSopenharmony_ci.endm 370cabdff1aSopenharmony_ci 371cabdff1aSopenharmony_ci.macro h264_chroma_mc2 type 372cabdff1aSopenharmony_cifunction ff_\type\()_h264_chroma_mc2_neon, export=1 373cabdff1aSopenharmony_ci push {r4-r6, lr} 374cabdff1aSopenharmony_ci ldr r4, [sp, #16] 375cabdff1aSopenharmony_ci ldr lr, [sp, #20] 376cabdff1aSopenharmony_ci pld [r1] 377cabdff1aSopenharmony_ci pld [r1, r2] 378cabdff1aSopenharmony_ci orrs r5, r4, lr 379cabdff1aSopenharmony_ci beq 2f 380cabdff1aSopenharmony_ci 381cabdff1aSopenharmony_ci mul r5, r4, lr 382cabdff1aSopenharmony_ci rsb r6, r5, lr, lsl #3 383cabdff1aSopenharmony_ci rsb r12, r5, r4, lsl #3 384cabdff1aSopenharmony_ci sub r4, r5, r4, lsl #3 385cabdff1aSopenharmony_ci sub r4, r4, lr, lsl #3 386cabdff1aSopenharmony_ci add r4, r4, #64 387cabdff1aSopenharmony_ci vdup.8 d0, r4 388cabdff1aSopenharmony_ci vdup.8 d2, r12 389cabdff1aSopenharmony_ci vdup.8 d1, r6 390cabdff1aSopenharmony_ci vdup.8 d3, r5 391cabdff1aSopenharmony_ci vtrn.16 q0, q1 392cabdff1aSopenharmony_ci1: 393cabdff1aSopenharmony_ci vld1.32 {d4[0]}, [r1], r2 394cabdff1aSopenharmony_ci vld1.32 {d4[1]}, [r1], r2 395cabdff1aSopenharmony_ci vrev64.32 d5, d4 396cabdff1aSopenharmony_ci vld1.32 {d5[1]}, [r1] 397cabdff1aSopenharmony_ci vext.8 q3, q2, q2, #1 398cabdff1aSopenharmony_ci vtrn.16 q2, q3 399cabdff1aSopenharmony_ci vmull.u8 q8, d4, d0 400cabdff1aSopenharmony_ci vmlal.u8 q8, d5, d1 401cabdff1aSopenharmony_ci .ifc \type,avg 402cabdff1aSopenharmony_ci vld1.16 {d18[0]}, [r0,:16], r2 403cabdff1aSopenharmony_ci vld1.16 {d18[1]}, [r0,:16] 404cabdff1aSopenharmony_ci sub r0, r0, r2 405cabdff1aSopenharmony_ci .endif 406cabdff1aSopenharmony_ci vtrn.32 d16, d17 407cabdff1aSopenharmony_ci vadd.i16 d16, d16, d17 408cabdff1aSopenharmony_ci vrshrn.u16 d16, q8, #6 409cabdff1aSopenharmony_ci .ifc \type,avg 410cabdff1aSopenharmony_ci vrhadd.u8 d16, d16, d18 411cabdff1aSopenharmony_ci .endif 412cabdff1aSopenharmony_ci vst1.16 {d16[0]}, [r0,:16], r2 413cabdff1aSopenharmony_ci vst1.16 {d16[1]}, [r0,:16], r2 414cabdff1aSopenharmony_ci subs r3, r3, #2 415cabdff1aSopenharmony_ci bgt 1b 416cabdff1aSopenharmony_ci pop {r4-r6, pc} 417cabdff1aSopenharmony_ci2: 418cabdff1aSopenharmony_ci .ifc \type,put 419cabdff1aSopenharmony_ci ldrh_post r5, r1, r2 420cabdff1aSopenharmony_ci strh_post r5, r0, r2 421cabdff1aSopenharmony_ci ldrh_post r6, r1, r2 422cabdff1aSopenharmony_ci strh_post r6, r0, r2 423cabdff1aSopenharmony_ci .else 424cabdff1aSopenharmony_ci vld1.16 {d16[0]}, [r1], r2 425cabdff1aSopenharmony_ci vld1.16 {d16[1]}, [r1], r2 426cabdff1aSopenharmony_ci vld1.16 {d18[0]}, [r0,:16], r2 427cabdff1aSopenharmony_ci vld1.16 {d18[1]}, [r0,:16] 428cabdff1aSopenharmony_ci sub r0, r0, r2 429cabdff1aSopenharmony_ci vrhadd.u8 d16, d16, d18 430cabdff1aSopenharmony_ci vst1.16 {d16[0]}, [r0,:16], r2 431cabdff1aSopenharmony_ci vst1.16 {d16[1]}, [r0,:16], r2 432cabdff1aSopenharmony_ci .endif 433cabdff1aSopenharmony_ci subs r3, r3, #2 434cabdff1aSopenharmony_ci bgt 2b 435cabdff1aSopenharmony_ci pop {r4-r6, pc} 436cabdff1aSopenharmony_ciendfunc 437cabdff1aSopenharmony_ci.endm 438cabdff1aSopenharmony_ci 439cabdff1aSopenharmony_ci h264_chroma_mc8 put 440cabdff1aSopenharmony_ci h264_chroma_mc8 avg 441cabdff1aSopenharmony_ci h264_chroma_mc4 put 442cabdff1aSopenharmony_ci h264_chroma_mc4 avg 443cabdff1aSopenharmony_ci h264_chroma_mc2 put 444cabdff1aSopenharmony_ci h264_chroma_mc2 avg 445cabdff1aSopenharmony_ci 446cabdff1aSopenharmony_ci#if CONFIG_RV40_DECODER 447cabdff1aSopenharmony_ciconst rv40bias 448cabdff1aSopenharmony_ci .short 0, 16, 32, 16 449cabdff1aSopenharmony_ci .short 32, 28, 32, 28 450cabdff1aSopenharmony_ci .short 0, 32, 16, 32 451cabdff1aSopenharmony_ci .short 32, 28, 32, 28 452cabdff1aSopenharmony_ciendconst 453cabdff1aSopenharmony_ci 454cabdff1aSopenharmony_ci h264_chroma_mc8 put, rv40 455cabdff1aSopenharmony_ci h264_chroma_mc8 avg, rv40 456cabdff1aSopenharmony_ci h264_chroma_mc4 put, rv40 457cabdff1aSopenharmony_ci h264_chroma_mc4 avg, rv40 458cabdff1aSopenharmony_ci#endif 459cabdff1aSopenharmony_ci 460cabdff1aSopenharmony_ci#if CONFIG_VC1DSP 461cabdff1aSopenharmony_ci h264_chroma_mc8 put, vc1 462cabdff1aSopenharmony_ci h264_chroma_mc8 avg, vc1 463cabdff1aSopenharmony_ci h264_chroma_mc4 put, vc1 464cabdff1aSopenharmony_ci h264_chroma_mc4 avg, vc1 465cabdff1aSopenharmony_ci#endif 466