1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 3cabdff1aSopenharmony_ci * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net> 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S" 23cabdff1aSopenharmony_ci#include "neon.S" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_cifunction ff_h264_idct_add_neon, export=1 26cabdff1aSopenharmony_ci.L_ff_h264_idct_add_neon: 27cabdff1aSopenharmony_ci AARCH64_VALID_CALL_TARGET 28cabdff1aSopenharmony_ci ld1 {v0.4H, v1.4H, v2.4H, v3.4H}, [x1] 29cabdff1aSopenharmony_ci sxtw x2, w2 30cabdff1aSopenharmony_ci movi v30.8H, #0 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci add v4.4H, v0.4H, v2.4H 33cabdff1aSopenharmony_ci sshr v16.4H, v1.4H, #1 34cabdff1aSopenharmony_ci st1 {v30.8H}, [x1], #16 35cabdff1aSopenharmony_ci sshr v17.4H, v3.4H, #1 36cabdff1aSopenharmony_ci st1 {v30.8H}, [x1], #16 37cabdff1aSopenharmony_ci sub v5.4H, v0.4H, v2.4H 38cabdff1aSopenharmony_ci sub v6.4H, v16.4H, v3.4H 39cabdff1aSopenharmony_ci add v7.4H, v1.4H, v17.4H 40cabdff1aSopenharmony_ci add v0.4H, v4.4H, v7.4H 41cabdff1aSopenharmony_ci add v1.4H, v5.4H, v6.4H 42cabdff1aSopenharmony_ci sub v2.4H, v5.4H, v6.4H 43cabdff1aSopenharmony_ci sub v3.4H, v4.4H, v7.4H 44cabdff1aSopenharmony_ci 45cabdff1aSopenharmony_ci transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci add v4.4H, v0.4H, v2.4H 48cabdff1aSopenharmony_ci ld1 {v18.S}[0], [x0], x2 49cabdff1aSopenharmony_ci sshr v16.4H, v3.4H, #1 50cabdff1aSopenharmony_ci sshr v17.4H, v1.4H, #1 51cabdff1aSopenharmony_ci ld1 {v18.S}[1], [x0], x2 52cabdff1aSopenharmony_ci sub v5.4H, v0.4H, v2.4H 53cabdff1aSopenharmony_ci ld1 {v19.S}[1], [x0], x2 54cabdff1aSopenharmony_ci add v6.4H, v16.4H, v1.4H 55cabdff1aSopenharmony_ci ins v4.D[1], v5.D[0] 56cabdff1aSopenharmony_ci sub v7.4H, v17.4H, v3.4H 57cabdff1aSopenharmony_ci ld1 {v19.S}[0], [x0], x2 58cabdff1aSopenharmony_ci ins v6.D[1], v7.D[0] 59cabdff1aSopenharmony_ci sub x0, x0, x2, lsl #2 60cabdff1aSopenharmony_ci add v0.8H, v4.8H, v6.8H 61cabdff1aSopenharmony_ci sub v1.8H, v4.8H, v6.8H 62cabdff1aSopenharmony_ci 63cabdff1aSopenharmony_ci srshr v0.8H, v0.8H, #6 64cabdff1aSopenharmony_ci srshr v1.8H, v1.8H, #6 65cabdff1aSopenharmony_ci 66cabdff1aSopenharmony_ci uaddw v0.8H, v0.8H, v18.8B 67cabdff1aSopenharmony_ci uaddw v1.8H, v1.8H, v19.8B 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci sqxtun v0.8B, v0.8H 70cabdff1aSopenharmony_ci sqxtun v1.8B, v1.8H 71cabdff1aSopenharmony_ci 72cabdff1aSopenharmony_ci st1 {v0.S}[0], [x0], x2 73cabdff1aSopenharmony_ci st1 {v0.S}[1], [x0], x2 74cabdff1aSopenharmony_ci st1 {v1.S}[1], [x0], x2 75cabdff1aSopenharmony_ci st1 {v1.S}[0], [x0], x2 76cabdff1aSopenharmony_ci 77cabdff1aSopenharmony_ci sub x1, x1, #32 78cabdff1aSopenharmony_ci ret 79cabdff1aSopenharmony_ciendfunc 80cabdff1aSopenharmony_ci 81cabdff1aSopenharmony_cifunction ff_h264_idct_dc_add_neon, export=1 82cabdff1aSopenharmony_ci.L_ff_h264_idct_dc_add_neon: 83cabdff1aSopenharmony_ci AARCH64_VALID_CALL_TARGET 84cabdff1aSopenharmony_ci sxtw x2, w2 85cabdff1aSopenharmony_ci mov w3, #0 86cabdff1aSopenharmony_ci ld1r {v2.8H}, [x1] 87cabdff1aSopenharmony_ci strh w3, [x1] 88cabdff1aSopenharmony_ci srshr v2.8H, v2.8H, #6 89cabdff1aSopenharmony_ci ld1 {v0.S}[0], [x0], x2 90cabdff1aSopenharmony_ci ld1 {v0.S}[1], [x0], x2 91cabdff1aSopenharmony_ci uaddw v3.8H, v2.8H, v0.8B 92cabdff1aSopenharmony_ci ld1 {v1.S}[0], [x0], x2 93cabdff1aSopenharmony_ci ld1 {v1.S}[1], [x0], x2 94cabdff1aSopenharmony_ci uaddw v4.8H, v2.8H, v1.8B 95cabdff1aSopenharmony_ci sqxtun v0.8B, v3.8H 96cabdff1aSopenharmony_ci sqxtun v1.8B, v4.8H 97cabdff1aSopenharmony_ci sub x0, x0, x2, lsl #2 98cabdff1aSopenharmony_ci st1 {v0.S}[0], [x0], x2 99cabdff1aSopenharmony_ci st1 {v0.S}[1], [x0], x2 100cabdff1aSopenharmony_ci st1 {v1.S}[0], [x0], x2 101cabdff1aSopenharmony_ci st1 {v1.S}[1], [x0], x2 102cabdff1aSopenharmony_ci ret 103cabdff1aSopenharmony_ciendfunc 104cabdff1aSopenharmony_ci 105cabdff1aSopenharmony_cifunction ff_h264_idct_add16_neon, export=1 106cabdff1aSopenharmony_ci mov x12, x30 107cabdff1aSopenharmony_ci mov x6, x0 // dest 108cabdff1aSopenharmony_ci mov x5, x1 // block_offset 109cabdff1aSopenharmony_ci mov x1, x2 // block 110cabdff1aSopenharmony_ci mov w9, w3 // stride 111cabdff1aSopenharmony_ci movrel x7, scan8 112cabdff1aSopenharmony_ci mov x10, #16 113cabdff1aSopenharmony_ci movrel x13, .L_ff_h264_idct_dc_add_neon 114cabdff1aSopenharmony_ci movrel x14, .L_ff_h264_idct_add_neon 115cabdff1aSopenharmony_ci1: mov w2, w9 116cabdff1aSopenharmony_ci ldrb w3, [x7], #1 117cabdff1aSopenharmony_ci ldrsw x0, [x5], #4 118cabdff1aSopenharmony_ci ldrb w3, [x4, w3, uxtw] 119cabdff1aSopenharmony_ci subs w3, w3, #1 120cabdff1aSopenharmony_ci b.lt 2f 121cabdff1aSopenharmony_ci ldrsh w3, [x1] 122cabdff1aSopenharmony_ci add x0, x0, x6 123cabdff1aSopenharmony_ci ccmp w3, #0, #4, eq 124cabdff1aSopenharmony_ci csel x15, x13, x14, ne 125cabdff1aSopenharmony_ci blr x15 126cabdff1aSopenharmony_ci2: subs x10, x10, #1 127cabdff1aSopenharmony_ci add x1, x1, #32 128cabdff1aSopenharmony_ci b.ne 1b 129cabdff1aSopenharmony_ci ret x12 130cabdff1aSopenharmony_ciendfunc 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_cifunction ff_h264_idct_add16intra_neon, export=1 133cabdff1aSopenharmony_ci mov x12, x30 134cabdff1aSopenharmony_ci mov x6, x0 // dest 135cabdff1aSopenharmony_ci mov x5, x1 // block_offset 136cabdff1aSopenharmony_ci mov x1, x2 // block 137cabdff1aSopenharmony_ci mov w9, w3 // stride 138cabdff1aSopenharmony_ci movrel x7, scan8 139cabdff1aSopenharmony_ci mov x10, #16 140cabdff1aSopenharmony_ci movrel x13, .L_ff_h264_idct_dc_add_neon 141cabdff1aSopenharmony_ci movrel x14, .L_ff_h264_idct_add_neon 142cabdff1aSopenharmony_ci1: mov w2, w9 143cabdff1aSopenharmony_ci ldrb w3, [x7], #1 144cabdff1aSopenharmony_ci ldrsw x0, [x5], #4 145cabdff1aSopenharmony_ci ldrb w3, [x4, w3, uxtw] 146cabdff1aSopenharmony_ci add x0, x0, x6 147cabdff1aSopenharmony_ci cmp w3, #0 148cabdff1aSopenharmony_ci ldrsh w3, [x1] 149cabdff1aSopenharmony_ci csel x15, x13, x14, eq 150cabdff1aSopenharmony_ci ccmp w3, #0, #0, eq 151cabdff1aSopenharmony_ci b.eq 2f 152cabdff1aSopenharmony_ci blr x15 153cabdff1aSopenharmony_ci2: subs x10, x10, #1 154cabdff1aSopenharmony_ci add x1, x1, #32 155cabdff1aSopenharmony_ci b.ne 1b 156cabdff1aSopenharmony_ci ret x12 157cabdff1aSopenharmony_ciendfunc 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_cifunction ff_h264_idct_add8_neon, export=1 160cabdff1aSopenharmony_ci sub sp, sp, #0x40 161cabdff1aSopenharmony_ci stp x19, x20, [sp] 162cabdff1aSopenharmony_ci mov x12, x30 163cabdff1aSopenharmony_ci ldp x6, x15, [x0] // dest[0], dest[1] 164cabdff1aSopenharmony_ci add x5, x1, #16*4 // block_offset 165cabdff1aSopenharmony_ci add x9, x2, #16*32 // block 166cabdff1aSopenharmony_ci mov w19, w3 // stride 167cabdff1aSopenharmony_ci movrel x13, .L_ff_h264_idct_dc_add_neon 168cabdff1aSopenharmony_ci movrel x14, .L_ff_h264_idct_add_neon 169cabdff1aSopenharmony_ci movrel x7, scan8, 16 170cabdff1aSopenharmony_ci mov x10, #0 171cabdff1aSopenharmony_ci mov x11, #16 172cabdff1aSopenharmony_ci1: mov w2, w19 173cabdff1aSopenharmony_ci ldrb w3, [x7, x10] // scan8[i] 174cabdff1aSopenharmony_ci ldrsw x0, [x5, x10, lsl #2] // block_offset[i] 175cabdff1aSopenharmony_ci ldrb w3, [x4, w3, uxtw] // nnzc[ scan8[i] ] 176cabdff1aSopenharmony_ci add x0, x0, x6 // block_offset[i] + dst[j-1] 177cabdff1aSopenharmony_ci add x1, x9, x10, lsl #5 // block + i * 16 178cabdff1aSopenharmony_ci cmp w3, #0 179cabdff1aSopenharmony_ci ldrsh w3, [x1] // block[i*16] 180cabdff1aSopenharmony_ci csel x20, x13, x14, eq 181cabdff1aSopenharmony_ci ccmp w3, #0, #0, eq 182cabdff1aSopenharmony_ci b.eq 2f 183cabdff1aSopenharmony_ci blr x20 184cabdff1aSopenharmony_ci2: add x10, x10, #1 185cabdff1aSopenharmony_ci cmp x10, #4 186cabdff1aSopenharmony_ci csel x10, x11, x10, eq // mov x10, #16 187cabdff1aSopenharmony_ci csel x6, x15, x6, eq 188cabdff1aSopenharmony_ci cmp x10, #20 189cabdff1aSopenharmony_ci b.lt 1b 190cabdff1aSopenharmony_ci ldp x19, x20, [sp] 191cabdff1aSopenharmony_ci add sp, sp, #0x40 192cabdff1aSopenharmony_ci ret x12 193cabdff1aSopenharmony_ciendfunc 194cabdff1aSopenharmony_ci 195cabdff1aSopenharmony_ci.macro idct8x8_cols pass 196cabdff1aSopenharmony_ci .if \pass == 0 197cabdff1aSopenharmony_ci va .req v18 198cabdff1aSopenharmony_ci vb .req v30 199cabdff1aSopenharmony_ci sshr v18.8H, v26.8H, #1 200cabdff1aSopenharmony_ci add v16.8H, v24.8H, v28.8H 201cabdff1aSopenharmony_ci ld1 {v30.8H, v31.8H}, [x1] 202cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 203cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 204cabdff1aSopenharmony_ci sub v17.8H, v24.8H, v28.8H 205cabdff1aSopenharmony_ci sshr v19.8H, v30.8H, #1 206cabdff1aSopenharmony_ci sub v18.8H, v18.8H, v30.8H 207cabdff1aSopenharmony_ci add v19.8H, v19.8H, v26.8H 208cabdff1aSopenharmony_ci .else 209cabdff1aSopenharmony_ci va .req v30 210cabdff1aSopenharmony_ci vb .req v18 211cabdff1aSopenharmony_ci sshr v30.8H, v26.8H, #1 212cabdff1aSopenharmony_ci sshr v19.8H, v18.8H, #1 213cabdff1aSopenharmony_ci add v16.8H, v24.8H, v28.8H 214cabdff1aSopenharmony_ci sub v17.8H, v24.8H, v28.8H 215cabdff1aSopenharmony_ci sub v30.8H, v30.8H, v18.8H 216cabdff1aSopenharmony_ci add v19.8H, v19.8H, v26.8H 217cabdff1aSopenharmony_ci .endif 218cabdff1aSopenharmony_ci add v26.8H, v17.8H, va.8H 219cabdff1aSopenharmony_ci sub v28.8H, v17.8H, va.8H 220cabdff1aSopenharmony_ci add v24.8H, v16.8H, v19.8H 221cabdff1aSopenharmony_ci sub vb.8H, v16.8H, v19.8H 222cabdff1aSopenharmony_ci sub v16.8H, v29.8H, v27.8H 223cabdff1aSopenharmony_ci add v17.8H, v31.8H, v25.8H 224cabdff1aSopenharmony_ci sub va.8H, v31.8H, v25.8H 225cabdff1aSopenharmony_ci add v19.8H, v29.8H, v27.8H 226cabdff1aSopenharmony_ci sub v16.8H, v16.8H, v31.8H 227cabdff1aSopenharmony_ci sub v17.8H, v17.8H, v27.8H 228cabdff1aSopenharmony_ci add va.8H, va.8H, v29.8H 229cabdff1aSopenharmony_ci add v19.8H, v19.8H, v25.8H 230cabdff1aSopenharmony_ci sshr v25.8H, v25.8H, #1 231cabdff1aSopenharmony_ci sshr v27.8H, v27.8H, #1 232cabdff1aSopenharmony_ci sshr v29.8H, v29.8H, #1 233cabdff1aSopenharmony_ci sshr v31.8H, v31.8H, #1 234cabdff1aSopenharmony_ci sub v16.8H, v16.8H, v31.8H 235cabdff1aSopenharmony_ci sub v17.8H, v17.8H, v27.8H 236cabdff1aSopenharmony_ci add va.8H, va.8H, v29.8H 237cabdff1aSopenharmony_ci add v19.8H, v19.8H, v25.8H 238cabdff1aSopenharmony_ci sshr v25.8H, v16.8H, #2 239cabdff1aSopenharmony_ci sshr v27.8H, v17.8H, #2 240cabdff1aSopenharmony_ci sshr v29.8H, va.8H, #2 241cabdff1aSopenharmony_ci sshr v31.8H, v19.8H, #2 242cabdff1aSopenharmony_ci sub v19.8H, v19.8H, v25.8H 243cabdff1aSopenharmony_ci sub va.8H, v27.8H, va.8H 244cabdff1aSopenharmony_ci add v17.8H, v17.8H, v29.8H 245cabdff1aSopenharmony_ci add v16.8H, v16.8H, v31.8H 246cabdff1aSopenharmony_ci .if \pass == 0 247cabdff1aSopenharmony_ci sub v31.8H, v24.8H, v19.8H 248cabdff1aSopenharmony_ci add v24.8H, v24.8H, v19.8H 249cabdff1aSopenharmony_ci add v25.8H, v26.8H, v18.8H 250cabdff1aSopenharmony_ci sub v18.8H, v26.8H, v18.8H 251cabdff1aSopenharmony_ci add v26.8H, v28.8H, v17.8H 252cabdff1aSopenharmony_ci add v27.8H, v30.8H, v16.8H 253cabdff1aSopenharmony_ci sub v29.8H, v28.8H, v17.8H 254cabdff1aSopenharmony_ci sub v28.8H, v30.8H, v16.8H 255cabdff1aSopenharmony_ci .else 256cabdff1aSopenharmony_ci sub v31.8H, v24.8H, v19.8H 257cabdff1aSopenharmony_ci add v24.8H, v24.8H, v19.8H 258cabdff1aSopenharmony_ci add v25.8H, v26.8H, v30.8H 259cabdff1aSopenharmony_ci sub v30.8H, v26.8H, v30.8H 260cabdff1aSopenharmony_ci add v26.8H, v28.8H, v17.8H 261cabdff1aSopenharmony_ci sub v29.8H, v28.8H, v17.8H 262cabdff1aSopenharmony_ci add v27.8H, v18.8H, v16.8H 263cabdff1aSopenharmony_ci sub v28.8H, v18.8H, v16.8H 264cabdff1aSopenharmony_ci .endif 265cabdff1aSopenharmony_ci .unreq va 266cabdff1aSopenharmony_ci .unreq vb 267cabdff1aSopenharmony_ci.endm 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_cifunction ff_h264_idct8_add_neon, export=1 270cabdff1aSopenharmony_ci.L_ff_h264_idct8_add_neon: 271cabdff1aSopenharmony_ci AARCH64_VALID_CALL_TARGET 272cabdff1aSopenharmony_ci movi v19.8H, #0 273cabdff1aSopenharmony_ci sxtw x2, w2 274cabdff1aSopenharmony_ci ld1 {v24.8H, v25.8H}, [x1] 275cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 276cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 277cabdff1aSopenharmony_ci ld1 {v26.8H, v27.8H}, [x1] 278cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 279cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 280cabdff1aSopenharmony_ci ld1 {v28.8H, v29.8H}, [x1] 281cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 282cabdff1aSopenharmony_ci st1 {v19.8H}, [x1], #16 283cabdff1aSopenharmony_ci 284cabdff1aSopenharmony_ci idct8x8_cols 0 285cabdff1aSopenharmony_ci transpose_8x8H v24, v25, v26, v27, v28, v29, v18, v31, v6, v7 286cabdff1aSopenharmony_ci idct8x8_cols 1 287cabdff1aSopenharmony_ci 288cabdff1aSopenharmony_ci mov x3, x0 289cabdff1aSopenharmony_ci srshr v24.8H, v24.8H, #6 290cabdff1aSopenharmony_ci ld1 {v0.8B}, [x0], x2 291cabdff1aSopenharmony_ci srshr v25.8H, v25.8H, #6 292cabdff1aSopenharmony_ci ld1 {v1.8B}, [x0], x2 293cabdff1aSopenharmony_ci srshr v26.8H, v26.8H, #6 294cabdff1aSopenharmony_ci ld1 {v2.8B}, [x0], x2 295cabdff1aSopenharmony_ci srshr v27.8H, v27.8H, #6 296cabdff1aSopenharmony_ci ld1 {v3.8B}, [x0], x2 297cabdff1aSopenharmony_ci srshr v28.8H, v28.8H, #6 298cabdff1aSopenharmony_ci ld1 {v4.8B}, [x0], x2 299cabdff1aSopenharmony_ci srshr v29.8H, v29.8H, #6 300cabdff1aSopenharmony_ci ld1 {v5.8B}, [x0], x2 301cabdff1aSopenharmony_ci srshr v30.8H, v30.8H, #6 302cabdff1aSopenharmony_ci ld1 {v6.8B}, [x0], x2 303cabdff1aSopenharmony_ci srshr v31.8H, v31.8H, #6 304cabdff1aSopenharmony_ci ld1 {v7.8B}, [x0], x2 305cabdff1aSopenharmony_ci uaddw v24.8H, v24.8H, v0.8B 306cabdff1aSopenharmony_ci uaddw v25.8H, v25.8H, v1.8B 307cabdff1aSopenharmony_ci uaddw v26.8H, v26.8H, v2.8B 308cabdff1aSopenharmony_ci sqxtun v0.8B, v24.8H 309cabdff1aSopenharmony_ci uaddw v27.8H, v27.8H, v3.8B 310cabdff1aSopenharmony_ci sqxtun v1.8B, v25.8H 311cabdff1aSopenharmony_ci uaddw v28.8H, v28.8H, v4.8B 312cabdff1aSopenharmony_ci sqxtun v2.8B, v26.8H 313cabdff1aSopenharmony_ci st1 {v0.8B}, [x3], x2 314cabdff1aSopenharmony_ci uaddw v29.8H, v29.8H, v5.8B 315cabdff1aSopenharmony_ci sqxtun v3.8B, v27.8H 316cabdff1aSopenharmony_ci st1 {v1.8B}, [x3], x2 317cabdff1aSopenharmony_ci uaddw v30.8H, v30.8H, v6.8B 318cabdff1aSopenharmony_ci sqxtun v4.8B, v28.8H 319cabdff1aSopenharmony_ci st1 {v2.8B}, [x3], x2 320cabdff1aSopenharmony_ci uaddw v31.8H, v31.8H, v7.8B 321cabdff1aSopenharmony_ci sqxtun v5.8B, v29.8H 322cabdff1aSopenharmony_ci st1 {v3.8B}, [x3], x2 323cabdff1aSopenharmony_ci sqxtun v6.8B, v30.8H 324cabdff1aSopenharmony_ci sqxtun v7.8B, v31.8H 325cabdff1aSopenharmony_ci st1 {v4.8B}, [x3], x2 326cabdff1aSopenharmony_ci st1 {v5.8B}, [x3], x2 327cabdff1aSopenharmony_ci st1 {v6.8B}, [x3], x2 328cabdff1aSopenharmony_ci st1 {v7.8B}, [x3], x2 329cabdff1aSopenharmony_ci 330cabdff1aSopenharmony_ci sub x1, x1, #128 331cabdff1aSopenharmony_ci ret 332cabdff1aSopenharmony_ciendfunc 333cabdff1aSopenharmony_ci 334cabdff1aSopenharmony_cifunction ff_h264_idct8_dc_add_neon, export=1 335cabdff1aSopenharmony_ci.L_ff_h264_idct8_dc_add_neon: 336cabdff1aSopenharmony_ci AARCH64_VALID_CALL_TARGET 337cabdff1aSopenharmony_ci mov w3, #0 338cabdff1aSopenharmony_ci sxtw x2, w2 339cabdff1aSopenharmony_ci ld1r {v31.8H}, [x1] 340cabdff1aSopenharmony_ci strh w3, [x1] 341cabdff1aSopenharmony_ci ld1 {v0.8B}, [x0], x2 342cabdff1aSopenharmony_ci srshr v31.8H, v31.8H, #6 343cabdff1aSopenharmony_ci ld1 {v1.8B}, [x0], x2 344cabdff1aSopenharmony_ci ld1 {v2.8B}, [x0], x2 345cabdff1aSopenharmony_ci uaddw v24.8H, v31.8H, v0.8B 346cabdff1aSopenharmony_ci ld1 {v3.8B}, [x0], x2 347cabdff1aSopenharmony_ci uaddw v25.8H, v31.8H, v1.8B 348cabdff1aSopenharmony_ci ld1 {v4.8B}, [x0], x2 349cabdff1aSopenharmony_ci uaddw v26.8H, v31.8H, v2.8B 350cabdff1aSopenharmony_ci ld1 {v5.8B}, [x0], x2 351cabdff1aSopenharmony_ci uaddw v27.8H, v31.8H, v3.8B 352cabdff1aSopenharmony_ci ld1 {v6.8B}, [x0], x2 353cabdff1aSopenharmony_ci uaddw v28.8H, v31.8H, v4.8B 354cabdff1aSopenharmony_ci ld1 {v7.8B}, [x0], x2 355cabdff1aSopenharmony_ci uaddw v29.8H, v31.8H, v5.8B 356cabdff1aSopenharmony_ci uaddw v30.8H, v31.8H, v6.8B 357cabdff1aSopenharmony_ci uaddw v31.8H, v31.8H, v7.8B 358cabdff1aSopenharmony_ci sqxtun v0.8B, v24.8H 359cabdff1aSopenharmony_ci sqxtun v1.8B, v25.8H 360cabdff1aSopenharmony_ci sqxtun v2.8B, v26.8H 361cabdff1aSopenharmony_ci sqxtun v3.8B, v27.8H 362cabdff1aSopenharmony_ci sub x0, x0, x2, lsl #3 363cabdff1aSopenharmony_ci st1 {v0.8B}, [x0], x2 364cabdff1aSopenharmony_ci sqxtun v4.8B, v28.8H 365cabdff1aSopenharmony_ci st1 {v1.8B}, [x0], x2 366cabdff1aSopenharmony_ci sqxtun v5.8B, v29.8H 367cabdff1aSopenharmony_ci st1 {v2.8B}, [x0], x2 368cabdff1aSopenharmony_ci sqxtun v6.8B, v30.8H 369cabdff1aSopenharmony_ci st1 {v3.8B}, [x0], x2 370cabdff1aSopenharmony_ci sqxtun v7.8B, v31.8H 371cabdff1aSopenharmony_ci st1 {v4.8B}, [x0], x2 372cabdff1aSopenharmony_ci st1 {v5.8B}, [x0], x2 373cabdff1aSopenharmony_ci st1 {v6.8B}, [x0], x2 374cabdff1aSopenharmony_ci st1 {v7.8B}, [x0], x2 375cabdff1aSopenharmony_ci ret 376cabdff1aSopenharmony_ciendfunc 377cabdff1aSopenharmony_ci 378cabdff1aSopenharmony_cifunction ff_h264_idct8_add4_neon, export=1 379cabdff1aSopenharmony_ci mov x12, x30 380cabdff1aSopenharmony_ci mov x6, x0 381cabdff1aSopenharmony_ci mov x5, x1 382cabdff1aSopenharmony_ci mov x1, x2 383cabdff1aSopenharmony_ci mov w2, w3 384cabdff1aSopenharmony_ci movrel x7, scan8 385cabdff1aSopenharmony_ci mov w10, #16 386cabdff1aSopenharmony_ci movrel x13, .L_ff_h264_idct8_dc_add_neon 387cabdff1aSopenharmony_ci movrel x14, .L_ff_h264_idct8_add_neon 388cabdff1aSopenharmony_ci1: ldrb w9, [x7], #4 389cabdff1aSopenharmony_ci ldrsw x0, [x5], #16 390cabdff1aSopenharmony_ci ldrb w9, [x4, w9, UXTW] 391cabdff1aSopenharmony_ci subs w9, w9, #1 392cabdff1aSopenharmony_ci b.lt 2f 393cabdff1aSopenharmony_ci ldrsh w11, [x1] 394cabdff1aSopenharmony_ci add x0, x6, x0 395cabdff1aSopenharmony_ci ccmp w11, #0, #4, eq 396cabdff1aSopenharmony_ci csel x15, x13, x14, ne 397cabdff1aSopenharmony_ci blr x15 398cabdff1aSopenharmony_ci2: subs w10, w10, #4 399cabdff1aSopenharmony_ci add x1, x1, #128 400cabdff1aSopenharmony_ci b.ne 1b 401cabdff1aSopenharmony_ci ret x12 402cabdff1aSopenharmony_ciendfunc 403cabdff1aSopenharmony_ci 404cabdff1aSopenharmony_ciconst scan8 405cabdff1aSopenharmony_ci .byte 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8 406cabdff1aSopenharmony_ci .byte 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8 407cabdff1aSopenharmony_ci .byte 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8 408cabdff1aSopenharmony_ci .byte 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8 409cabdff1aSopenharmony_ci .byte 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8 410cabdff1aSopenharmony_ci .byte 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8 411cabdff1aSopenharmony_ci .byte 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8 412cabdff1aSopenharmony_ci .byte 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8 413cabdff1aSopenharmony_ci .byte 4+11*8, 5+11*8, 4+12*8, 5+12*8 414cabdff1aSopenharmony_ci .byte 6+11*8, 7+11*8, 6+12*8, 7+12*8 415cabdff1aSopenharmony_ci .byte 4+13*8, 5+13*8, 4+14*8, 5+14*8 416cabdff1aSopenharmony_ci .byte 6+13*8, 7+13*8, 6+14*8, 7+14*8 417cabdff1aSopenharmony_ciendconst 418