1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 3cabdff1aSopenharmony_ci * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "config_components.h" 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ 27cabdff1aSopenharmony_ci.macro h264_chroma_mc8 type, codec=h264 28cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc8_neon, export=1 29cabdff1aSopenharmony_ci .ifc \type,avg 30cabdff1aSopenharmony_ci mov x8, x0 31cabdff1aSopenharmony_ci .endif 32cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 33cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 34cabdff1aSopenharmony_ci .ifc \codec,rv40 35cabdff1aSopenharmony_ci movrel x6, rv40bias 36cabdff1aSopenharmony_ci lsr w9, w5, #1 37cabdff1aSopenharmony_ci lsr w10, w4, #1 38cabdff1aSopenharmony_ci lsl w9, w9, #3 39cabdff1aSopenharmony_ci lsl w10, w10, #1 40cabdff1aSopenharmony_ci add w9, w9, w10 41cabdff1aSopenharmony_ci add x6, x6, w9, UXTW 42cabdff1aSopenharmony_ci ld1r {v22.8H}, [x6] 43cabdff1aSopenharmony_ci .endif 44cabdff1aSopenharmony_ci .ifc \codec,vc1 45cabdff1aSopenharmony_ci movi v22.8H, #28 46cabdff1aSopenharmony_ci .endif 47cabdff1aSopenharmony_ci mul w7, w4, w5 48cabdff1aSopenharmony_ci lsl w14, w5, #3 49cabdff1aSopenharmony_ci lsl w13, w4, #3 50cabdff1aSopenharmony_ci cmp w7, #0 51cabdff1aSopenharmony_ci sub w6, w14, w7 52cabdff1aSopenharmony_ci sub w12, w13, w7 53cabdff1aSopenharmony_ci sub w4, w7, w13 54cabdff1aSopenharmony_ci sub w4, w4, w14 55cabdff1aSopenharmony_ci add w4, w4, #64 56cabdff1aSopenharmony_ci b.eq 2f 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci dup v0.8B, w4 59cabdff1aSopenharmony_ci dup v1.8B, w12 60cabdff1aSopenharmony_ci ld1 {v4.8B, v5.8B}, [x1], x2 61cabdff1aSopenharmony_ci dup v2.8B, w6 62cabdff1aSopenharmony_ci dup v3.8B, w7 63cabdff1aSopenharmony_ci ext v5.8B, v4.8B, v5.8B, #1 64cabdff1aSopenharmony_ci1: ld1 {v6.8B, v7.8B}, [x1], x2 65cabdff1aSopenharmony_ci umull v16.8H, v4.8B, v0.8B 66cabdff1aSopenharmony_ci umlal v16.8H, v5.8B, v1.8B 67cabdff1aSopenharmony_ci ext v7.8B, v6.8B, v7.8B, #1 68cabdff1aSopenharmony_ci ld1 {v4.8B, v5.8B}, [x1], x2 69cabdff1aSopenharmony_ci umlal v16.8H, v6.8B, v2.8B 70cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 71cabdff1aSopenharmony_ci ext v5.8B, v4.8B, v5.8B, #1 72cabdff1aSopenharmony_ci umlal v16.8H, v7.8B, v3.8B 73cabdff1aSopenharmony_ci umull v17.8H, v6.8B, v0.8B 74cabdff1aSopenharmony_ci subs w3, w3, #2 75cabdff1aSopenharmony_ci umlal v17.8H, v7.8B, v1.8B 76cabdff1aSopenharmony_ci umlal v17.8H, v4.8B, v2.8B 77cabdff1aSopenharmony_ci umlal v17.8H, v5.8B, v3.8B 78cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 79cabdff1aSopenharmony_ci .ifc \codec,h264 80cabdff1aSopenharmony_ci rshrn v16.8B, v16.8H, #6 81cabdff1aSopenharmony_ci rshrn v17.8B, v17.8H, #6 82cabdff1aSopenharmony_ci .else 83cabdff1aSopenharmony_ci add v16.8H, v16.8H, v22.8H 84cabdff1aSopenharmony_ci add v17.8H, v17.8H, v22.8H 85cabdff1aSopenharmony_ci shrn v16.8B, v16.8H, #6 86cabdff1aSopenharmony_ci shrn v17.8B, v17.8H, #6 87cabdff1aSopenharmony_ci .endif 88cabdff1aSopenharmony_ci .ifc \type,avg 89cabdff1aSopenharmony_ci ld1 {v20.8B}, [x8], x2 90cabdff1aSopenharmony_ci ld1 {v21.8B}, [x8], x2 91cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 92cabdff1aSopenharmony_ci urhadd v17.8B, v17.8B, v21.8B 93cabdff1aSopenharmony_ci .endif 94cabdff1aSopenharmony_ci st1 {v16.8B}, [x0], x2 95cabdff1aSopenharmony_ci st1 {v17.8B}, [x0], x2 96cabdff1aSopenharmony_ci b.gt 1b 97cabdff1aSopenharmony_ci ret 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_ci2: adds w12, w12, w6 100cabdff1aSopenharmony_ci dup v0.8B, w4 101cabdff1aSopenharmony_ci b.eq 5f 102cabdff1aSopenharmony_ci tst w6, w6 103cabdff1aSopenharmony_ci dup v1.8B, w12 104cabdff1aSopenharmony_ci b.eq 4f 105cabdff1aSopenharmony_ci 106cabdff1aSopenharmony_ci ld1 {v4.8B}, [x1], x2 107cabdff1aSopenharmony_ci3: ld1 {v6.8B}, [x1], x2 108cabdff1aSopenharmony_ci umull v16.8H, v4.8B, v0.8B 109cabdff1aSopenharmony_ci umlal v16.8H, v6.8B, v1.8B 110cabdff1aSopenharmony_ci ld1 {v4.8B}, [x1], x2 111cabdff1aSopenharmony_ci umull v17.8H, v6.8B, v0.8B 112cabdff1aSopenharmony_ci umlal v17.8H, v4.8B, v1.8B 113cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 114cabdff1aSopenharmony_ci .ifc \codec,h264 115cabdff1aSopenharmony_ci rshrn v16.8B, v16.8H, #6 116cabdff1aSopenharmony_ci rshrn v17.8B, v17.8H, #6 117cabdff1aSopenharmony_ci .else 118cabdff1aSopenharmony_ci add v16.8H, v16.8H, v22.8H 119cabdff1aSopenharmony_ci add v17.8H, v17.8H, v22.8H 120cabdff1aSopenharmony_ci shrn v16.8B, v16.8H, #6 121cabdff1aSopenharmony_ci shrn v17.8B, v17.8H, #6 122cabdff1aSopenharmony_ci .endif 123cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 124cabdff1aSopenharmony_ci .ifc \type,avg 125cabdff1aSopenharmony_ci ld1 {v20.8B}, [x8], x2 126cabdff1aSopenharmony_ci ld1 {v21.8B}, [x8], x2 127cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 128cabdff1aSopenharmony_ci urhadd v17.8B, v17.8B, v21.8B 129cabdff1aSopenharmony_ci .endif 130cabdff1aSopenharmony_ci subs w3, w3, #2 131cabdff1aSopenharmony_ci st1 {v16.8B}, [x0], x2 132cabdff1aSopenharmony_ci st1 {v17.8B}, [x0], x2 133cabdff1aSopenharmony_ci b.gt 3b 134cabdff1aSopenharmony_ci ret 135cabdff1aSopenharmony_ci 136cabdff1aSopenharmony_ci4: ld1 {v4.8B, v5.8B}, [x1], x2 137cabdff1aSopenharmony_ci ld1 {v6.8B, v7.8B}, [x1], x2 138cabdff1aSopenharmony_ci ext v5.8B, v4.8B, v5.8B, #1 139cabdff1aSopenharmony_ci ext v7.8B, v6.8B, v7.8B, #1 140cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 141cabdff1aSopenharmony_ci subs w3, w3, #2 142cabdff1aSopenharmony_ci umull v16.8H, v4.8B, v0.8B 143cabdff1aSopenharmony_ci umlal v16.8H, v5.8B, v1.8B 144cabdff1aSopenharmony_ci umull v17.8H, v6.8B, v0.8B 145cabdff1aSopenharmony_ci umlal v17.8H, v7.8B, v1.8B 146cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 147cabdff1aSopenharmony_ci .ifc \codec,h264 148cabdff1aSopenharmony_ci rshrn v16.8B, v16.8H, #6 149cabdff1aSopenharmony_ci rshrn v17.8B, v17.8H, #6 150cabdff1aSopenharmony_ci .else 151cabdff1aSopenharmony_ci add v16.8H, v16.8H, v22.8H 152cabdff1aSopenharmony_ci add v17.8H, v17.8H, v22.8H 153cabdff1aSopenharmony_ci shrn v16.8B, v16.8H, #6 154cabdff1aSopenharmony_ci shrn v17.8B, v17.8H, #6 155cabdff1aSopenharmony_ci .endif 156cabdff1aSopenharmony_ci .ifc \type,avg 157cabdff1aSopenharmony_ci ld1 {v20.8B}, [x8], x2 158cabdff1aSopenharmony_ci ld1 {v21.8B}, [x8], x2 159cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 160cabdff1aSopenharmony_ci urhadd v17.8B, v17.8B, v21.8B 161cabdff1aSopenharmony_ci .endif 162cabdff1aSopenharmony_ci st1 {v16.8B}, [x0], x2 163cabdff1aSopenharmony_ci st1 {v17.8B}, [x0], x2 164cabdff1aSopenharmony_ci b.gt 4b 165cabdff1aSopenharmony_ci ret 166cabdff1aSopenharmony_ci 167cabdff1aSopenharmony_ci5: ld1 {v4.8B}, [x1], x2 168cabdff1aSopenharmony_ci ld1 {v5.8B}, [x1], x2 169cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 170cabdff1aSopenharmony_ci subs w3, w3, #2 171cabdff1aSopenharmony_ci umull v16.8H, v4.8B, v0.8B 172cabdff1aSopenharmony_ci umull v17.8H, v5.8B, v0.8B 173cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 174cabdff1aSopenharmony_ci .ifc \codec,h264 175cabdff1aSopenharmony_ci rshrn v16.8B, v16.8H, #6 176cabdff1aSopenharmony_ci rshrn v17.8B, v17.8H, #6 177cabdff1aSopenharmony_ci .else 178cabdff1aSopenharmony_ci add v16.8H, v16.8H, v22.8H 179cabdff1aSopenharmony_ci add v17.8H, v17.8H, v22.8H 180cabdff1aSopenharmony_ci shrn v16.8B, v16.8H, #6 181cabdff1aSopenharmony_ci shrn v17.8B, v17.8H, #6 182cabdff1aSopenharmony_ci .endif 183cabdff1aSopenharmony_ci .ifc \type,avg 184cabdff1aSopenharmony_ci ld1 {v20.8B}, [x8], x2 185cabdff1aSopenharmony_ci ld1 {v21.8B}, [x8], x2 186cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 187cabdff1aSopenharmony_ci urhadd v17.8B, v17.8B, v21.8B 188cabdff1aSopenharmony_ci .endif 189cabdff1aSopenharmony_ci st1 {v16.8B}, [x0], x2 190cabdff1aSopenharmony_ci st1 {v17.8B}, [x0], x2 191cabdff1aSopenharmony_ci b.gt 5b 192cabdff1aSopenharmony_ci ret 193cabdff1aSopenharmony_ciendfunc 194cabdff1aSopenharmony_ci.endm 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_ci/* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ 197cabdff1aSopenharmony_ci.macro h264_chroma_mc4 type, codec=h264 198cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc4_neon, export=1 199cabdff1aSopenharmony_ci .ifc \type,avg 200cabdff1aSopenharmony_ci mov x8, x0 201cabdff1aSopenharmony_ci .endif 202cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 203cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 204cabdff1aSopenharmony_ci .ifc \codec,rv40 205cabdff1aSopenharmony_ci movrel x6, rv40bias 206cabdff1aSopenharmony_ci lsr w9, w5, #1 207cabdff1aSopenharmony_ci lsr w10, w4, #1 208cabdff1aSopenharmony_ci lsl w9, w9, #3 209cabdff1aSopenharmony_ci lsl w10, w10, #1 210cabdff1aSopenharmony_ci add w9, w9, w10 211cabdff1aSopenharmony_ci add x6, x6, w9, UXTW 212cabdff1aSopenharmony_ci ld1r {v22.8H}, [x6] 213cabdff1aSopenharmony_ci .endif 214cabdff1aSopenharmony_ci .ifc \codec,vc1 215cabdff1aSopenharmony_ci movi v22.8H, #28 216cabdff1aSopenharmony_ci .endif 217cabdff1aSopenharmony_ci mul w7, w4, w5 218cabdff1aSopenharmony_ci lsl w14, w5, #3 219cabdff1aSopenharmony_ci lsl w13, w4, #3 220cabdff1aSopenharmony_ci cmp w7, #0 221cabdff1aSopenharmony_ci sub w6, w14, w7 222cabdff1aSopenharmony_ci sub w12, w13, w7 223cabdff1aSopenharmony_ci sub w4, w7, w13 224cabdff1aSopenharmony_ci sub w4, w4, w14 225cabdff1aSopenharmony_ci add w4, w4, #64 226cabdff1aSopenharmony_ci b.eq 2f 227cabdff1aSopenharmony_ci 228cabdff1aSopenharmony_ci dup v24.8B, w4 229cabdff1aSopenharmony_ci dup v25.8B, w12 230cabdff1aSopenharmony_ci ld1 {v4.8B}, [x1], x2 231cabdff1aSopenharmony_ci dup v26.8B, w6 232cabdff1aSopenharmony_ci dup v27.8B, w7 233cabdff1aSopenharmony_ci ext v5.8B, v4.8B, v5.8B, #1 234cabdff1aSopenharmony_ci trn1 v0.2S, v24.2S, v25.2S 235cabdff1aSopenharmony_ci trn1 v2.2S, v26.2S, v27.2S 236cabdff1aSopenharmony_ci trn1 v4.2S, v4.2S, v5.2S 237cabdff1aSopenharmony_ci1: ld1 {v6.8B}, [x1], x2 238cabdff1aSopenharmony_ci ext v7.8B, v6.8B, v7.8B, #1 239cabdff1aSopenharmony_ci trn1 v6.2S, v6.2S, v7.2S 240cabdff1aSopenharmony_ci umull v18.8H, v4.8B, v0.8B 241cabdff1aSopenharmony_ci umlal v18.8H, v6.8B, v2.8B 242cabdff1aSopenharmony_ci ld1 {v4.8B}, [x1], x2 243cabdff1aSopenharmony_ci ext v5.8B, v4.8B, v5.8B, #1 244cabdff1aSopenharmony_ci trn1 v4.2S, v4.2S, v5.2S 245cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 246cabdff1aSopenharmony_ci umull v19.8H, v6.8B, v0.8B 247cabdff1aSopenharmony_ci umlal v19.8H, v4.8B, v2.8B 248cabdff1aSopenharmony_ci trn1 v30.2D, v18.2D, v19.2D 249cabdff1aSopenharmony_ci trn2 v31.2D, v18.2D, v19.2D 250cabdff1aSopenharmony_ci add v18.8H, v30.8H, v31.8H 251cabdff1aSopenharmony_ci .ifc \codec,h264 252cabdff1aSopenharmony_ci rshrn v16.8B, v18.8H, #6 253cabdff1aSopenharmony_ci .else 254cabdff1aSopenharmony_ci add v18.8H, v18.8H, v22.8H 255cabdff1aSopenharmony_ci shrn v16.8B, v18.8H, #6 256cabdff1aSopenharmony_ci .endif 257cabdff1aSopenharmony_ci subs w3, w3, #2 258cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 259cabdff1aSopenharmony_ci .ifc \type,avg 260cabdff1aSopenharmony_ci ld1 {v20.S}[0], [x8], x2 261cabdff1aSopenharmony_ci ld1 {v20.S}[1], [x8], x2 262cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 263cabdff1aSopenharmony_ci .endif 264cabdff1aSopenharmony_ci st1 {v16.S}[0], [x0], x2 265cabdff1aSopenharmony_ci st1 {v16.S}[1], [x0], x2 266cabdff1aSopenharmony_ci b.gt 1b 267cabdff1aSopenharmony_ci ret 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_ci2: adds w12, w12, w6 270cabdff1aSopenharmony_ci dup v30.8B, w4 271cabdff1aSopenharmony_ci b.eq 5f 272cabdff1aSopenharmony_ci tst w6, w6 273cabdff1aSopenharmony_ci dup v31.8B, w12 274cabdff1aSopenharmony_ci trn1 v0.2S, v30.2S, v31.2S 275cabdff1aSopenharmony_ci trn2 v1.2S, v30.2S, v31.2S 276cabdff1aSopenharmony_ci b.eq 4f 277cabdff1aSopenharmony_ci 278cabdff1aSopenharmony_ci ext v1.8B, v0.8B, v1.8B, #4 279cabdff1aSopenharmony_ci ld1 {v4.S}[0], [x1], x2 280cabdff1aSopenharmony_ci3: ld1 {v4.S}[1], [x1], x2 281cabdff1aSopenharmony_ci umull v18.8H, v4.8B, v0.8B 282cabdff1aSopenharmony_ci ld1 {v4.S}[0], [x1], x2 283cabdff1aSopenharmony_ci umull v19.8H, v4.8B, v1.8B 284cabdff1aSopenharmony_ci trn1 v30.2D, v18.2D, v19.2D 285cabdff1aSopenharmony_ci trn2 v31.2D, v18.2D, v19.2D 286cabdff1aSopenharmony_ci add v18.8H, v30.8H, v31.8H 287cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 288cabdff1aSopenharmony_ci .ifc \codec,h264 289cabdff1aSopenharmony_ci rshrn v16.8B, v18.8H, #6 290cabdff1aSopenharmony_ci .else 291cabdff1aSopenharmony_ci add v18.8H, v18.8H, v22.8H 292cabdff1aSopenharmony_ci shrn v16.8B, v18.8H, #6 293cabdff1aSopenharmony_ci .endif 294cabdff1aSopenharmony_ci .ifc \type,avg 295cabdff1aSopenharmony_ci ld1 {v20.S}[0], [x8], x2 296cabdff1aSopenharmony_ci ld1 {v20.S}[1], [x8], x2 297cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 298cabdff1aSopenharmony_ci .endif 299cabdff1aSopenharmony_ci subs w3, w3, #2 300cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 301cabdff1aSopenharmony_ci st1 {v16.S}[0], [x0], x2 302cabdff1aSopenharmony_ci st1 {v16.S}[1], [x0], x2 303cabdff1aSopenharmony_ci b.gt 3b 304cabdff1aSopenharmony_ci ret 305cabdff1aSopenharmony_ci 306cabdff1aSopenharmony_ci4: ld1 {v4.8B}, [x1], x2 307cabdff1aSopenharmony_ci ld1 {v6.8B}, [x1], x2 308cabdff1aSopenharmony_ci ext v5.8B, v4.8B, v5.8B, #1 309cabdff1aSopenharmony_ci ext v7.8B, v6.8B, v7.8B, #1 310cabdff1aSopenharmony_ci trn1 v4.2S, v4.2S, v5.2S 311cabdff1aSopenharmony_ci trn1 v6.2S, v6.2S, v7.2S 312cabdff1aSopenharmony_ci umull v18.8H, v4.8B, v0.8B 313cabdff1aSopenharmony_ci umull v19.8H, v6.8B, v0.8B 314cabdff1aSopenharmony_ci subs w3, w3, #2 315cabdff1aSopenharmony_ci trn1 v30.2D, v18.2D, v19.2D 316cabdff1aSopenharmony_ci trn2 v31.2D, v18.2D, v19.2D 317cabdff1aSopenharmony_ci add v18.8H, v30.8H, v31.8H 318cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 319cabdff1aSopenharmony_ci .ifc \codec,h264 320cabdff1aSopenharmony_ci rshrn v16.8B, v18.8H, #6 321cabdff1aSopenharmony_ci .else 322cabdff1aSopenharmony_ci add v18.8H, v18.8H, v22.8H 323cabdff1aSopenharmony_ci shrn v16.8B, v18.8H, #6 324cabdff1aSopenharmony_ci .endif 325cabdff1aSopenharmony_ci .ifc \type,avg 326cabdff1aSopenharmony_ci ld1 {v20.S}[0], [x8], x2 327cabdff1aSopenharmony_ci ld1 {v20.S}[1], [x8], x2 328cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 329cabdff1aSopenharmony_ci .endif 330cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 331cabdff1aSopenharmony_ci st1 {v16.S}[0], [x0], x2 332cabdff1aSopenharmony_ci st1 {v16.S}[1], [x0], x2 333cabdff1aSopenharmony_ci b.gt 4b 334cabdff1aSopenharmony_ci ret 335cabdff1aSopenharmony_ci 336cabdff1aSopenharmony_ci5: ld1 {v4.S}[0], [x1], x2 337cabdff1aSopenharmony_ci ld1 {v4.S}[1], [x1], x2 338cabdff1aSopenharmony_ci umull v18.8H, v4.8B, v30.8B 339cabdff1aSopenharmony_ci subs w3, w3, #2 340cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 341cabdff1aSopenharmony_ci .ifc \codec,h264 342cabdff1aSopenharmony_ci rshrn v16.8B, v18.8H, #6 343cabdff1aSopenharmony_ci .else 344cabdff1aSopenharmony_ci add v18.8H, v18.8H, v22.8H 345cabdff1aSopenharmony_ci shrn v16.8B, v18.8H, #6 346cabdff1aSopenharmony_ci .endif 347cabdff1aSopenharmony_ci .ifc \type,avg 348cabdff1aSopenharmony_ci ld1 {v20.S}[0], [x8], x2 349cabdff1aSopenharmony_ci ld1 {v20.S}[1], [x8], x2 350cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v20.8B 351cabdff1aSopenharmony_ci .endif 352cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 353cabdff1aSopenharmony_ci st1 {v16.S}[0], [x0], x2 354cabdff1aSopenharmony_ci st1 {v16.S}[1], [x0], x2 355cabdff1aSopenharmony_ci b.gt 5b 356cabdff1aSopenharmony_ci ret 357cabdff1aSopenharmony_ciendfunc 358cabdff1aSopenharmony_ci.endm 359cabdff1aSopenharmony_ci 360cabdff1aSopenharmony_ci.macro h264_chroma_mc2 type 361cabdff1aSopenharmony_cifunction ff_\type\()_h264_chroma_mc2_neon, export=1 362cabdff1aSopenharmony_ci prfm pldl1strm, [x1] 363cabdff1aSopenharmony_ci prfm pldl1strm, [x1, x2] 364cabdff1aSopenharmony_ci orr w7, w4, w5 365cabdff1aSopenharmony_ci cbz w7, 2f 366cabdff1aSopenharmony_ci 367cabdff1aSopenharmony_ci mul w7, w4, w5 368cabdff1aSopenharmony_ci lsl w14, w5, #3 369cabdff1aSopenharmony_ci lsl w13, w4, #3 370cabdff1aSopenharmony_ci sub w6, w14, w7 371cabdff1aSopenharmony_ci sub w12, w13, w7 372cabdff1aSopenharmony_ci sub w4, w7, w13 373cabdff1aSopenharmony_ci sub w4, w4, w14 374cabdff1aSopenharmony_ci add w4, w4, #64 375cabdff1aSopenharmony_ci dup v0.8B, w4 376cabdff1aSopenharmony_ci dup v2.8B, w12 377cabdff1aSopenharmony_ci dup v1.8B, w6 378cabdff1aSopenharmony_ci dup v3.8B, w7 379cabdff1aSopenharmony_ci trn1 v0.4H, v0.4H, v2.4H 380cabdff1aSopenharmony_ci trn1 v1.4H, v1.4H, v3.4H 381cabdff1aSopenharmony_ci1: 382cabdff1aSopenharmony_ci ld1 {v4.S}[0], [x1], x2 383cabdff1aSopenharmony_ci ld1 {v4.S}[1], [x1], x2 384cabdff1aSopenharmony_ci rev64 v5.2S, v4.2S 385cabdff1aSopenharmony_ci ld1 {v5.S}[1], [x1] 386cabdff1aSopenharmony_ci ext v6.8B, v4.8B, v5.8B, #1 387cabdff1aSopenharmony_ci ext v7.8B, v5.8B, v4.8B, #1 388cabdff1aSopenharmony_ci trn1 v4.4H, v4.4H, v6.4H 389cabdff1aSopenharmony_ci trn1 v5.4H, v5.4H, v7.4H 390cabdff1aSopenharmony_ci umull v16.8H, v4.8B, v0.8B 391cabdff1aSopenharmony_ci umlal v16.8H, v5.8B, v1.8B 392cabdff1aSopenharmony_ci .ifc \type,avg 393cabdff1aSopenharmony_ci ld1 {v18.H}[0], [x0], x2 394cabdff1aSopenharmony_ci ld1 {v18.H}[2], [x0] 395cabdff1aSopenharmony_ci sub x0, x0, x2 396cabdff1aSopenharmony_ci .endif 397cabdff1aSopenharmony_ci rev64 v17.4S, v16.4S 398cabdff1aSopenharmony_ci add v16.8H, v16.8H, v17.8H 399cabdff1aSopenharmony_ci rshrn v16.8B, v16.8H, #6 400cabdff1aSopenharmony_ci .ifc \type,avg 401cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v18.8B 402cabdff1aSopenharmony_ci .endif 403cabdff1aSopenharmony_ci st1 {v16.H}[0], [x0], x2 404cabdff1aSopenharmony_ci st1 {v16.H}[2], [x0], x2 405cabdff1aSopenharmony_ci subs w3, w3, #2 406cabdff1aSopenharmony_ci b.gt 1b 407cabdff1aSopenharmony_ci ret 408cabdff1aSopenharmony_ci 409cabdff1aSopenharmony_ci2: 410cabdff1aSopenharmony_ci ld1 {v16.H}[0], [x1], x2 411cabdff1aSopenharmony_ci ld1 {v16.H}[1], [x1], x2 412cabdff1aSopenharmony_ci .ifc \type,avg 413cabdff1aSopenharmony_ci ld1 {v18.H}[0], [x0], x2 414cabdff1aSopenharmony_ci ld1 {v18.H}[1], [x0] 415cabdff1aSopenharmony_ci sub x0, x0, x2 416cabdff1aSopenharmony_ci urhadd v16.8B, v16.8B, v18.8B 417cabdff1aSopenharmony_ci .endif 418cabdff1aSopenharmony_ci st1 {v16.H}[0], [x0], x2 419cabdff1aSopenharmony_ci st1 {v16.H}[1], [x0], x2 420cabdff1aSopenharmony_ci subs w3, w3, #2 421cabdff1aSopenharmony_ci b.gt 2b 422cabdff1aSopenharmony_ci ret 423cabdff1aSopenharmony_ciendfunc 424cabdff1aSopenharmony_ci.endm 425cabdff1aSopenharmony_ci 426cabdff1aSopenharmony_ci h264_chroma_mc8 put 427cabdff1aSopenharmony_ci h264_chroma_mc8 avg 428cabdff1aSopenharmony_ci h264_chroma_mc4 put 429cabdff1aSopenharmony_ci h264_chroma_mc4 avg 430cabdff1aSopenharmony_ci h264_chroma_mc2 put 431cabdff1aSopenharmony_ci h264_chroma_mc2 avg 432cabdff1aSopenharmony_ci 433cabdff1aSopenharmony_ci#if CONFIG_RV40_DECODER 434cabdff1aSopenharmony_ciconst rv40bias 435cabdff1aSopenharmony_ci .short 0, 16, 32, 16 436cabdff1aSopenharmony_ci .short 32, 28, 32, 28 437cabdff1aSopenharmony_ci .short 0, 32, 16, 32 438cabdff1aSopenharmony_ci .short 32, 28, 32, 28 439cabdff1aSopenharmony_ciendconst 440cabdff1aSopenharmony_ci 441cabdff1aSopenharmony_ci h264_chroma_mc8 put, rv40 442cabdff1aSopenharmony_ci h264_chroma_mc8 avg, rv40 443cabdff1aSopenharmony_ci h264_chroma_mc4 put, rv40 444cabdff1aSopenharmony_ci h264_chroma_mc4 avg, rv40 445cabdff1aSopenharmony_ci#endif 446cabdff1aSopenharmony_ci 447cabdff1aSopenharmony_ci#if CONFIG_VC1DSP 448cabdff1aSopenharmony_ci h264_chroma_mc8 put, vc1 449cabdff1aSopenharmony_ci h264_chroma_mc8 avg, vc1 450cabdff1aSopenharmony_ci h264_chroma_mc4 put, vc1 451cabdff1aSopenharmony_ci h264_chroma_mc4 avg, vc1 452cabdff1aSopenharmony_ci#endif 453