1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * ARM NEON IDCT 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 5cabdff1aSopenharmony_ci * Copyright (c) 2017 Matthieu Bouron <matthieu.bouron@gmail.com> 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * Based on Simple IDCT 8cabdff1aSopenharmony_ci * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> 9cabdff1aSopenharmony_ci * 10cabdff1aSopenharmony_ci * This file is part of FFmpeg. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 13cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 14cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 15cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 18cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 19cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20cabdff1aSopenharmony_ci * Lesser General Public License for more details. 21cabdff1aSopenharmony_ci * 22cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 23cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 24cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25cabdff1aSopenharmony_ci */ 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#define Z1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 30cabdff1aSopenharmony_ci#define Z2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 31cabdff1aSopenharmony_ci#define Z3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 32cabdff1aSopenharmony_ci#define Z4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 33cabdff1aSopenharmony_ci#define Z5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 34cabdff1aSopenharmony_ci#define Z6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 35cabdff1aSopenharmony_ci#define Z7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 36cabdff1aSopenharmony_ci#define Z4c ((1<<(COL_SHIFT-1))/Z4) 37cabdff1aSopenharmony_ci#define ROW_SHIFT 11 38cabdff1aSopenharmony_ci#define COL_SHIFT 20 39cabdff1aSopenharmony_ci 40cabdff1aSopenharmony_ci#define z1 v0.H[0] 41cabdff1aSopenharmony_ci#define z2 v0.H[1] 42cabdff1aSopenharmony_ci#define z3 v0.H[2] 43cabdff1aSopenharmony_ci#define z4 v0.H[3] 44cabdff1aSopenharmony_ci#define z5 v0.H[4] 45cabdff1aSopenharmony_ci#define z6 v0.H[5] 46cabdff1aSopenharmony_ci#define z7 v0.H[6] 47cabdff1aSopenharmony_ci#define z4c v0.H[7] 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_ciconst idct_coeff_neon, align=4 50cabdff1aSopenharmony_ci .short Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z4c 51cabdff1aSopenharmony_ciendconst 52cabdff1aSopenharmony_ci 53cabdff1aSopenharmony_ci.macro idct_start data 54cabdff1aSopenharmony_ci prfm pldl1keep, [\data] 55cabdff1aSopenharmony_ci mov x10, x30 56cabdff1aSopenharmony_ci movrel x3, idct_coeff_neon 57cabdff1aSopenharmony_ci ld1 {v0.2D}, [x3] 58cabdff1aSopenharmony_ci.endm 59cabdff1aSopenharmony_ci 60cabdff1aSopenharmony_ci.macro idct_end 61cabdff1aSopenharmony_ci ret x10 62cabdff1aSopenharmony_ci.endm 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_ci.macro smull1 a, b, c 65cabdff1aSopenharmony_ci smull \a, \b, \c 66cabdff1aSopenharmony_ci.endm 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci.macro smlal1 a, b, c 69cabdff1aSopenharmony_ci smlal \a, \b, \c 70cabdff1aSopenharmony_ci.endm 71cabdff1aSopenharmony_ci 72cabdff1aSopenharmony_ci.macro smlsl1 a, b, c 73cabdff1aSopenharmony_ci smlsl \a, \b, \c 74cabdff1aSopenharmony_ci.endm 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci.macro idct_col4_top y1, y2, y3, y4, i, l 77cabdff1aSopenharmony_ci smull\i v7.4S, \y3\l, z2 78cabdff1aSopenharmony_ci smull\i v16.4S, \y3\l, z6 79cabdff1aSopenharmony_ci smull\i v17.4S, \y2\l, z1 80cabdff1aSopenharmony_ci add v19.4S, v23.4S, v7.4S 81cabdff1aSopenharmony_ci smull\i v18.4S, \y2\l, z3 82cabdff1aSopenharmony_ci add v20.4S, v23.4S, v16.4S 83cabdff1aSopenharmony_ci smull\i v5.4S, \y2\l, z5 84cabdff1aSopenharmony_ci sub v21.4S, v23.4S, v16.4S 85cabdff1aSopenharmony_ci smull\i v6.4S, \y2\l, z7 86cabdff1aSopenharmony_ci sub v22.4S, v23.4S, v7.4S 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ci smlal\i v17.4S, \y4\l, z3 89cabdff1aSopenharmony_ci smlsl\i v18.4S, \y4\l, z7 90cabdff1aSopenharmony_ci smlsl\i v5.4S, \y4\l, z1 91cabdff1aSopenharmony_ci smlsl\i v6.4S, \y4\l, z5 92cabdff1aSopenharmony_ci.endm 93cabdff1aSopenharmony_ci 94cabdff1aSopenharmony_ci.macro idct_row4_neon y1, y2, y3, y4, pass 95cabdff1aSopenharmony_ci ld1 {\y1\().2D,\y2\().2D}, [x2], #32 96cabdff1aSopenharmony_ci movi v23.4S, #1<<2, lsl #8 97cabdff1aSopenharmony_ci orr v5.16B, \y1\().16B, \y2\().16B 98cabdff1aSopenharmony_ci ld1 {\y3\().2D,\y4\().2D}, [x2], #32 99cabdff1aSopenharmony_ci orr v6.16B, \y3\().16B, \y4\().16B 100cabdff1aSopenharmony_ci orr v5.16B, v5.16B, v6.16B 101cabdff1aSopenharmony_ci mov x3, v5.D[1] 102cabdff1aSopenharmony_ci smlal v23.4S, \y1\().4H, z4 103cabdff1aSopenharmony_ci 104cabdff1aSopenharmony_ci idct_col4_top \y1, \y2, \y3, \y4, 1, .4H 105cabdff1aSopenharmony_ci 106cabdff1aSopenharmony_ci cmp x3, #0 107cabdff1aSopenharmony_ci b.eq \pass\()f 108cabdff1aSopenharmony_ci 109cabdff1aSopenharmony_ci smull2 v7.4S, \y1\().8H, z4 110cabdff1aSopenharmony_ci smlal2 v17.4S, \y2\().8H, z5 111cabdff1aSopenharmony_ci smlsl2 v18.4S, \y2\().8H, z1 112cabdff1aSopenharmony_ci smull2 v16.4S, \y3\().8H, z2 113cabdff1aSopenharmony_ci smlal2 v5.4S, \y2\().8H, z7 114cabdff1aSopenharmony_ci add v19.4S, v19.4S, v7.4S 115cabdff1aSopenharmony_ci sub v20.4S, v20.4S, v7.4S 116cabdff1aSopenharmony_ci sub v21.4S, v21.4S, v7.4S 117cabdff1aSopenharmony_ci add v22.4S, v22.4S, v7.4S 118cabdff1aSopenharmony_ci smlal2 v6.4S, \y2\().8H, z3 119cabdff1aSopenharmony_ci smull2 v7.4S, \y3\().8H, z6 120cabdff1aSopenharmony_ci smlal2 v17.4S, \y4\().8H, z7 121cabdff1aSopenharmony_ci smlsl2 v18.4S, \y4\().8H, z5 122cabdff1aSopenharmony_ci smlal2 v5.4S, \y4\().8H, z3 123cabdff1aSopenharmony_ci smlsl2 v6.4S, \y4\().8H, z1 124cabdff1aSopenharmony_ci add v19.4S, v19.4S, v7.4S 125cabdff1aSopenharmony_ci sub v20.4S, v20.4S, v16.4S 126cabdff1aSopenharmony_ci add v21.4S, v21.4S, v16.4S 127cabdff1aSopenharmony_ci sub v22.4S, v22.4S, v7.4S 128cabdff1aSopenharmony_ci 129cabdff1aSopenharmony_ci\pass: add \y3\().4S, v19.4S, v17.4S 130cabdff1aSopenharmony_ci add \y4\().4S, v20.4S, v18.4S 131cabdff1aSopenharmony_ci shrn \y1\().4H, \y3\().4S, #ROW_SHIFT 132cabdff1aSopenharmony_ci shrn \y2\().4H, \y4\().4S, #ROW_SHIFT 133cabdff1aSopenharmony_ci add v7.4S, v21.4S, v5.4S 134cabdff1aSopenharmony_ci add v16.4S, v22.4S, v6.4S 135cabdff1aSopenharmony_ci shrn \y3\().4H, v7.4S, #ROW_SHIFT 136cabdff1aSopenharmony_ci shrn \y4\().4H, v16.4S, #ROW_SHIFT 137cabdff1aSopenharmony_ci sub v22.4S, v22.4S, v6.4S 138cabdff1aSopenharmony_ci sub v19.4S, v19.4S, v17.4S 139cabdff1aSopenharmony_ci sub v21.4S, v21.4S, v5.4S 140cabdff1aSopenharmony_ci shrn2 \y1\().8H, v22.4S, #ROW_SHIFT 141cabdff1aSopenharmony_ci sub v20.4S, v20.4S, v18.4S 142cabdff1aSopenharmony_ci shrn2 \y2\().8H, v21.4S, #ROW_SHIFT 143cabdff1aSopenharmony_ci shrn2 \y3\().8H, v20.4S, #ROW_SHIFT 144cabdff1aSopenharmony_ci shrn2 \y4\().8H, v19.4S, #ROW_SHIFT 145cabdff1aSopenharmony_ci 146cabdff1aSopenharmony_ci trn1 v16.8H, \y1\().8H, \y2\().8H 147cabdff1aSopenharmony_ci trn2 v17.8H, \y1\().8H, \y2\().8H 148cabdff1aSopenharmony_ci trn1 v18.8H, \y3\().8H, \y4\().8H 149cabdff1aSopenharmony_ci trn2 v19.8H, \y3\().8H, \y4\().8H 150cabdff1aSopenharmony_ci trn1 \y1\().4S, v16.4S, v18.4S 151cabdff1aSopenharmony_ci trn1 \y2\().4S, v17.4S, v19.4S 152cabdff1aSopenharmony_ci trn2 \y3\().4S, v16.4S, v18.4S 153cabdff1aSopenharmony_ci trn2 \y4\().4S, v17.4S, v19.4S 154cabdff1aSopenharmony_ci.endm 155cabdff1aSopenharmony_ci 156cabdff1aSopenharmony_ci.macro declare_idct_col4_neon i, l 157cabdff1aSopenharmony_cifunction idct_col4_neon\i 158cabdff1aSopenharmony_ci dup v23.4H, z4c 159cabdff1aSopenharmony_ci.if \i == 1 160cabdff1aSopenharmony_ci add v23.4H, v23.4H, v24.4H 161cabdff1aSopenharmony_ci.else 162cabdff1aSopenharmony_ci mov v5.D[0], v24.D[1] 163cabdff1aSopenharmony_ci add v23.4H, v23.4H, v5.4H 164cabdff1aSopenharmony_ci.endif 165cabdff1aSopenharmony_ci smull v23.4S, v23.4H, z4 166cabdff1aSopenharmony_ci 167cabdff1aSopenharmony_ci idct_col4_top v24, v25, v26, v27, \i, \l 168cabdff1aSopenharmony_ci 169cabdff1aSopenharmony_ci mov x4, v28.D[\i - 1] 170cabdff1aSopenharmony_ci mov x5, v29.D[\i - 1] 171cabdff1aSopenharmony_ci cmp x4, #0 172cabdff1aSopenharmony_ci b.eq 1f 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_ci smull\i v7.4S, v28\l, z4 175cabdff1aSopenharmony_ci add v19.4S, v19.4S, v7.4S 176cabdff1aSopenharmony_ci sub v20.4S, v20.4S, v7.4S 177cabdff1aSopenharmony_ci sub v21.4S, v21.4S, v7.4S 178cabdff1aSopenharmony_ci add v22.4S, v22.4S, v7.4S 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ci1: mov x4, v30.D[\i - 1] 181cabdff1aSopenharmony_ci cmp x5, #0 182cabdff1aSopenharmony_ci b.eq 2f 183cabdff1aSopenharmony_ci 184cabdff1aSopenharmony_ci smlal\i v17.4S, v29\l, z5 185cabdff1aSopenharmony_ci smlsl\i v18.4S, v29\l, z1 186cabdff1aSopenharmony_ci smlal\i v5.4S, v29\l, z7 187cabdff1aSopenharmony_ci smlal\i v6.4S, v29\l, z3 188cabdff1aSopenharmony_ci 189cabdff1aSopenharmony_ci2: mov x5, v31.D[\i - 1] 190cabdff1aSopenharmony_ci cmp x4, #0 191cabdff1aSopenharmony_ci b.eq 3f 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci smull\i v7.4S, v30\l, z6 194cabdff1aSopenharmony_ci smull\i v16.4S, v30\l, z2 195cabdff1aSopenharmony_ci add v19.4S, v19.4S, v7.4S 196cabdff1aSopenharmony_ci sub v22.4S, v22.4S, v7.4S 197cabdff1aSopenharmony_ci sub v20.4S, v20.4S, v16.4S 198cabdff1aSopenharmony_ci add v21.4S, v21.4S, v16.4S 199cabdff1aSopenharmony_ci 200cabdff1aSopenharmony_ci3: cmp x5, #0 201cabdff1aSopenharmony_ci b.eq 4f 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_ci smlal\i v17.4S, v31\l, z7 204cabdff1aSopenharmony_ci smlsl\i v18.4S, v31\l, z5 205cabdff1aSopenharmony_ci smlal\i v5.4S, v31\l, z3 206cabdff1aSopenharmony_ci smlsl\i v6.4S, v31\l, z1 207cabdff1aSopenharmony_ci 208cabdff1aSopenharmony_ci4: addhn v7.4H, v19.4S, v17.4S 209cabdff1aSopenharmony_ci addhn2 v7.8H, v20.4S, v18.4S 210cabdff1aSopenharmony_ci subhn v18.4H, v20.4S, v18.4S 211cabdff1aSopenharmony_ci subhn2 v18.8H, v19.4S, v17.4S 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_ci addhn v16.4H, v21.4S, v5.4S 214cabdff1aSopenharmony_ci addhn2 v16.8H, v22.4S, v6.4S 215cabdff1aSopenharmony_ci subhn v17.4H, v22.4S, v6.4S 216cabdff1aSopenharmony_ci subhn2 v17.8H, v21.4S, v5.4S 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_ci ret 219cabdff1aSopenharmony_ciendfunc 220cabdff1aSopenharmony_ci.endm 221cabdff1aSopenharmony_ci 222cabdff1aSopenharmony_cideclare_idct_col4_neon 1, .4H 223cabdff1aSopenharmony_cideclare_idct_col4_neon 2, .8H 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_cifunction ff_simple_idct_put_neon, export=1 226cabdff1aSopenharmony_ci idct_start x2 227cabdff1aSopenharmony_ci 228cabdff1aSopenharmony_ci idct_row4_neon v24, v25, v26, v27, 1 229cabdff1aSopenharmony_ci idct_row4_neon v28, v29, v30, v31, 2 230cabdff1aSopenharmony_ci bl idct_col4_neon1 231cabdff1aSopenharmony_ci 232cabdff1aSopenharmony_ci sqshrun v1.8B, v7.8H, #COL_SHIFT-16 233cabdff1aSopenharmony_ci sqshrun2 v1.16B, v16.8H, #COL_SHIFT-16 234cabdff1aSopenharmony_ci sqshrun v3.8B, v17.8H, #COL_SHIFT-16 235cabdff1aSopenharmony_ci sqshrun2 v3.16B, v18.8H, #COL_SHIFT-16 236cabdff1aSopenharmony_ci 237cabdff1aSopenharmony_ci bl idct_col4_neon2 238cabdff1aSopenharmony_ci 239cabdff1aSopenharmony_ci sqshrun v2.8B, v7.8H, #COL_SHIFT-16 240cabdff1aSopenharmony_ci sqshrun2 v2.16B, v16.8H, #COL_SHIFT-16 241cabdff1aSopenharmony_ci sqshrun v4.8B, v17.8H, #COL_SHIFT-16 242cabdff1aSopenharmony_ci sqshrun2 v4.16B, v18.8H, #COL_SHIFT-16 243cabdff1aSopenharmony_ci 244cabdff1aSopenharmony_ci zip1 v16.4S, v1.4S, v2.4S 245cabdff1aSopenharmony_ci zip2 v17.4S, v1.4S, v2.4S 246cabdff1aSopenharmony_ci 247cabdff1aSopenharmony_ci st1 {v16.D}[0], [x0], x1 248cabdff1aSopenharmony_ci st1 {v16.D}[1], [x0], x1 249cabdff1aSopenharmony_ci 250cabdff1aSopenharmony_ci zip1 v18.4S, v3.4S, v4.4S 251cabdff1aSopenharmony_ci zip2 v19.4S, v3.4S, v4.4S 252cabdff1aSopenharmony_ci 253cabdff1aSopenharmony_ci st1 {v17.D}[0], [x0], x1 254cabdff1aSopenharmony_ci st1 {v17.D}[1], [x0], x1 255cabdff1aSopenharmony_ci st1 {v18.D}[0], [x0], x1 256cabdff1aSopenharmony_ci st1 {v18.D}[1], [x0], x1 257cabdff1aSopenharmony_ci st1 {v19.D}[0], [x0], x1 258cabdff1aSopenharmony_ci st1 {v19.D}[1], [x0], x1 259cabdff1aSopenharmony_ci 260cabdff1aSopenharmony_ci idct_end 261cabdff1aSopenharmony_ciendfunc 262cabdff1aSopenharmony_ci 263cabdff1aSopenharmony_cifunction ff_simple_idct_add_neon, export=1 264cabdff1aSopenharmony_ci idct_start x2 265cabdff1aSopenharmony_ci 266cabdff1aSopenharmony_ci idct_row4_neon v24, v25, v26, v27, 1 267cabdff1aSopenharmony_ci idct_row4_neon v28, v29, v30, v31, 2 268cabdff1aSopenharmony_ci bl idct_col4_neon1 269cabdff1aSopenharmony_ci 270cabdff1aSopenharmony_ci sshr v1.8H, v7.8H, #COL_SHIFT-16 271cabdff1aSopenharmony_ci sshr v2.8H, v16.8H, #COL_SHIFT-16 272cabdff1aSopenharmony_ci sshr v3.8H, v17.8H, #COL_SHIFT-16 273cabdff1aSopenharmony_ci sshr v4.8H, v18.8H, #COL_SHIFT-16 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_ci bl idct_col4_neon2 276cabdff1aSopenharmony_ci 277cabdff1aSopenharmony_ci sshr v7.8H, v7.8H, #COL_SHIFT-16 278cabdff1aSopenharmony_ci sshr v16.8H, v16.8H, #COL_SHIFT-16 279cabdff1aSopenharmony_ci sshr v17.8H, v17.8H, #COL_SHIFT-16 280cabdff1aSopenharmony_ci sshr v18.8H, v18.8H, #COL_SHIFT-16 281cabdff1aSopenharmony_ci 282cabdff1aSopenharmony_ci mov x9, x0 283cabdff1aSopenharmony_ci ld1 {v19.D}[0], [x0], x1 284cabdff1aSopenharmony_ci zip1 v23.2D, v1.2D, v7.2D 285cabdff1aSopenharmony_ci zip2 v24.2D, v1.2D, v7.2D 286cabdff1aSopenharmony_ci ld1 {v19.D}[1], [x0], x1 287cabdff1aSopenharmony_ci zip1 v25.2D, v2.2D, v16.2D 288cabdff1aSopenharmony_ci zip2 v26.2D, v2.2D, v16.2D 289cabdff1aSopenharmony_ci ld1 {v20.D}[0], [x0], x1 290cabdff1aSopenharmony_ci zip1 v27.2D, v3.2D, v17.2D 291cabdff1aSopenharmony_ci zip2 v28.2D, v3.2D, v17.2D 292cabdff1aSopenharmony_ci ld1 {v20.D}[1], [x0], x1 293cabdff1aSopenharmony_ci zip1 v29.2D, v4.2D, v18.2D 294cabdff1aSopenharmony_ci zip2 v30.2D, v4.2D, v18.2D 295cabdff1aSopenharmony_ci ld1 {v21.D}[0], [x0], x1 296cabdff1aSopenharmony_ci uaddw v23.8H, v23.8H, v19.8B 297cabdff1aSopenharmony_ci uaddw2 v24.8H, v24.8H, v19.16B 298cabdff1aSopenharmony_ci ld1 {v21.D}[1], [x0], x1 299cabdff1aSopenharmony_ci sqxtun v23.8B, v23.8H 300cabdff1aSopenharmony_ci sqxtun2 v23.16B, v24.8H 301cabdff1aSopenharmony_ci ld1 {v22.D}[0], [x0], x1 302cabdff1aSopenharmony_ci uaddw v24.8H, v25.8H, v20.8B 303cabdff1aSopenharmony_ci uaddw2 v25.8H, v26.8H, v20.16B 304cabdff1aSopenharmony_ci ld1 {v22.D}[1], [x0], x1 305cabdff1aSopenharmony_ci sqxtun v24.8B, v24.8H 306cabdff1aSopenharmony_ci sqxtun2 v24.16B, v25.8H 307cabdff1aSopenharmony_ci st1 {v23.D}[0], [x9], x1 308cabdff1aSopenharmony_ci uaddw v25.8H, v27.8H, v21.8B 309cabdff1aSopenharmony_ci uaddw2 v26.8H, v28.8H, v21.16B 310cabdff1aSopenharmony_ci st1 {v23.D}[1], [x9], x1 311cabdff1aSopenharmony_ci sqxtun v25.8B, v25.8H 312cabdff1aSopenharmony_ci sqxtun2 v25.16B, v26.8H 313cabdff1aSopenharmony_ci st1 {v24.D}[0], [x9], x1 314cabdff1aSopenharmony_ci uaddw v26.8H, v29.8H, v22.8B 315cabdff1aSopenharmony_ci uaddw2 v27.8H, v30.8H, v22.16B 316cabdff1aSopenharmony_ci st1 {v24.D}[1], [x9], x1 317cabdff1aSopenharmony_ci sqxtun v26.8B, v26.8H 318cabdff1aSopenharmony_ci sqxtun2 v26.16B, v27.8H 319cabdff1aSopenharmony_ci st1 {v25.D}[0], [x9], x1 320cabdff1aSopenharmony_ci st1 {v25.D}[1], [x9], x1 321cabdff1aSopenharmony_ci st1 {v26.D}[0], [x9], x1 322cabdff1aSopenharmony_ci st1 {v26.D}[1], [x9], x1 323cabdff1aSopenharmony_ci 324cabdff1aSopenharmony_ci idct_end 325cabdff1aSopenharmony_ciendfunc 326cabdff1aSopenharmony_ci 327cabdff1aSopenharmony_cifunction ff_simple_idct_neon, export=1 328cabdff1aSopenharmony_ci idct_start x0 329cabdff1aSopenharmony_ci 330cabdff1aSopenharmony_ci mov x2, x0 331cabdff1aSopenharmony_ci idct_row4_neon v24, v25, v26, v27, 1 332cabdff1aSopenharmony_ci idct_row4_neon v28, v29, v30, v31, 2 333cabdff1aSopenharmony_ci sub x2, x2, #128 334cabdff1aSopenharmony_ci bl idct_col4_neon1 335cabdff1aSopenharmony_ci 336cabdff1aSopenharmony_ci sshr v1.8H, v7.8H, #COL_SHIFT-16 337cabdff1aSopenharmony_ci sshr v2.8H, v16.8H, #COL_SHIFT-16 338cabdff1aSopenharmony_ci sshr v3.8H, v17.8H, #COL_SHIFT-16 339cabdff1aSopenharmony_ci sshr v4.8H, v18.8H, #COL_SHIFT-16 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_ci bl idct_col4_neon2 342cabdff1aSopenharmony_ci 343cabdff1aSopenharmony_ci sshr v7.8H, v7.8H, #COL_SHIFT-16 344cabdff1aSopenharmony_ci sshr v16.8H, v16.8H, #COL_SHIFT-16 345cabdff1aSopenharmony_ci sshr v17.8H, v17.8H, #COL_SHIFT-16 346cabdff1aSopenharmony_ci sshr v18.8H, v18.8H, #COL_SHIFT-16 347cabdff1aSopenharmony_ci 348cabdff1aSopenharmony_ci zip1 v23.2D, v1.2D, v7.2D 349cabdff1aSopenharmony_ci zip2 v24.2D, v1.2D, v7.2D 350cabdff1aSopenharmony_ci st1 {v23.2D,v24.2D}, [x2], #32 351cabdff1aSopenharmony_ci zip1 v25.2D, v2.2D, v16.2D 352cabdff1aSopenharmony_ci zip2 v26.2D, v2.2D, v16.2D 353cabdff1aSopenharmony_ci st1 {v25.2D,v26.2D}, [x2], #32 354cabdff1aSopenharmony_ci zip1 v27.2D, v3.2D, v17.2D 355cabdff1aSopenharmony_ci zip2 v28.2D, v3.2D, v17.2D 356cabdff1aSopenharmony_ci st1 {v27.2D,v28.2D}, [x2], #32 357cabdff1aSopenharmony_ci zip1 v29.2D, v4.2D, v18.2D 358cabdff1aSopenharmony_ci zip2 v30.2D, v4.2D, v18.2D 359cabdff1aSopenharmony_ci st1 {v29.2D,v30.2D}, [x2], #32 360cabdff1aSopenharmony_ci 361cabdff1aSopenharmony_ci idct_end 362cabdff1aSopenharmony_ciendfunc 363