1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S" 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_cifunction ff_h264_idct_add_neon, export=1 24cabdff1aSopenharmony_cih264_idct_add_neon_nothumb: 25cabdff1aSopenharmony_ci vld1.64 {d0-d3}, [r1,:128] 26cabdff1aSopenharmony_ci vmov.i16 q15, #0 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci vswp d1, d2 29cabdff1aSopenharmony_ci vst1.16 {q15}, [r1,:128]! 30cabdff1aSopenharmony_ci vadd.i16 d4, d0, d1 31cabdff1aSopenharmony_ci vst1.16 {q15}, [r1,:128]! 32cabdff1aSopenharmony_ci vshr.s16 q8, q1, #1 33cabdff1aSopenharmony_ci vsub.i16 d5, d0, d1 34cabdff1aSopenharmony_ci vadd.i16 d6, d2, d17 35cabdff1aSopenharmony_ci vsub.i16 d7, d16, d3 36cabdff1aSopenharmony_ci vadd.i16 q0, q2, q3 37cabdff1aSopenharmony_ci vsub.i16 q1, q2, q3 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci vtrn.16 d0, d1 40cabdff1aSopenharmony_ci vtrn.16 d3, d2 41cabdff1aSopenharmony_ci vtrn.32 d0, d3 42cabdff1aSopenharmony_ci vtrn.32 d1, d2 43cabdff1aSopenharmony_ci 44cabdff1aSopenharmony_ci vadd.i16 d4, d0, d3 45cabdff1aSopenharmony_ci vld1.32 {d18[0]}, [r0,:32], r2 46cabdff1aSopenharmony_ci vswp d1, d3 47cabdff1aSopenharmony_ci vshr.s16 q8, q1, #1 48cabdff1aSopenharmony_ci vld1.32 {d19[1]}, [r0,:32], r2 49cabdff1aSopenharmony_ci vsub.i16 d5, d0, d1 50cabdff1aSopenharmony_ci vld1.32 {d18[1]}, [r0,:32], r2 51cabdff1aSopenharmony_ci vadd.i16 d6, d16, d3 52cabdff1aSopenharmony_ci vld1.32 {d19[0]}, [r0,:32], r2 53cabdff1aSopenharmony_ci vsub.i16 d7, d2, d17 54cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #2 55cabdff1aSopenharmony_ci vadd.i16 q0, q2, q3 56cabdff1aSopenharmony_ci vsub.i16 q1, q2, q3 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci vrshr.s16 q0, q0, #6 59cabdff1aSopenharmony_ci vrshr.s16 q1, q1, #6 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ci vaddw.u8 q0, q0, d18 62cabdff1aSopenharmony_ci vaddw.u8 q1, q1, d19 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_ci vqmovun.s16 d0, q0 65cabdff1aSopenharmony_ci vqmovun.s16 d1, q1 66cabdff1aSopenharmony_ci 67cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0,:32], r2 68cabdff1aSopenharmony_ci vst1.32 {d1[1]}, [r0,:32], r2 69cabdff1aSopenharmony_ci vst1.32 {d0[1]}, [r0,:32], r2 70cabdff1aSopenharmony_ci vst1.32 {d1[0]}, [r0,:32], r2 71cabdff1aSopenharmony_ci 72cabdff1aSopenharmony_ci sub r1, r1, #32 73cabdff1aSopenharmony_ci bx lr 74cabdff1aSopenharmony_ciendfunc 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_cifunction ff_h264_idct_dc_add_neon, export=1 77cabdff1aSopenharmony_cih264_idct_dc_add_neon_nothumb: 78cabdff1aSopenharmony_ci mov r3, #0 79cabdff1aSopenharmony_ci vld1.16 {d2[],d3[]}, [r1,:16] 80cabdff1aSopenharmony_ci strh r3, [r1] 81cabdff1aSopenharmony_ci vrshr.s16 q1, q1, #6 82cabdff1aSopenharmony_ci vld1.32 {d0[0]}, [r0,:32], r2 83cabdff1aSopenharmony_ci vld1.32 {d0[1]}, [r0,:32], r2 84cabdff1aSopenharmony_ci vaddw.u8 q2, q1, d0 85cabdff1aSopenharmony_ci vld1.32 {d1[0]}, [r0,:32], r2 86cabdff1aSopenharmony_ci vld1.32 {d1[1]}, [r0,:32], r2 87cabdff1aSopenharmony_ci vaddw.u8 q1, q1, d1 88cabdff1aSopenharmony_ci vqmovun.s16 d0, q2 89cabdff1aSopenharmony_ci vqmovun.s16 d1, q1 90cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #2 91cabdff1aSopenharmony_ci vst1.32 {d0[0]}, [r0,:32], r2 92cabdff1aSopenharmony_ci vst1.32 {d0[1]}, [r0,:32], r2 93cabdff1aSopenharmony_ci vst1.32 {d1[0]}, [r0,:32], r2 94cabdff1aSopenharmony_ci vst1.32 {d1[1]}, [r0,:32], r2 95cabdff1aSopenharmony_ci bx lr 96cabdff1aSopenharmony_ciendfunc 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_cifunction ff_h264_idct_add16_neon, export=1 99cabdff1aSopenharmony_ci push {r4-r8,lr} 100cabdff1aSopenharmony_ci mov r4, r0 101cabdff1aSopenharmony_ci mov r5, r1 102cabdff1aSopenharmony_ci mov r1, r2 103cabdff1aSopenharmony_ci mov r2, r3 104cabdff1aSopenharmony_ci ldr r6, [sp, #24] 105cabdff1aSopenharmony_ci movrel r7, scan8 106cabdff1aSopenharmony_ci mov ip, #16 107cabdff1aSopenharmony_ci1: ldrb r8, [r7], #1 108cabdff1aSopenharmony_ci ldr r0, [r5], #4 109cabdff1aSopenharmony_ci ldrb r8, [r6, r8] 110cabdff1aSopenharmony_ci subs r8, r8, #1 111cabdff1aSopenharmony_ci blt 2f 112cabdff1aSopenharmony_ci ldrsh lr, [r1] 113cabdff1aSopenharmony_ci add r0, r0, r4 114cabdff1aSopenharmony_ci it ne 115cabdff1aSopenharmony_ci movne lr, #0 116cabdff1aSopenharmony_ci cmp lr, #0 117cabdff1aSopenharmony_ci ite ne 118cabdff1aSopenharmony_ci adrne lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB 119cabdff1aSopenharmony_ci adreq lr, h264_idct_add_neon_nothumb + CONFIG_THUMB 120cabdff1aSopenharmony_ci blx lr 121cabdff1aSopenharmony_ci2: subs ip, ip, #1 122cabdff1aSopenharmony_ci add r1, r1, #32 123cabdff1aSopenharmony_ci bne 1b 124cabdff1aSopenharmony_ci pop {r4-r8,pc} 125cabdff1aSopenharmony_ciendfunc 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_cifunction ff_h264_idct_add16intra_neon, export=1 128cabdff1aSopenharmony_ci push {r4-r8,lr} 129cabdff1aSopenharmony_ci mov r4, r0 130cabdff1aSopenharmony_ci mov r5, r1 131cabdff1aSopenharmony_ci mov r1, r2 132cabdff1aSopenharmony_ci mov r2, r3 133cabdff1aSopenharmony_ci ldr r6, [sp, #24] 134cabdff1aSopenharmony_ci movrel r7, scan8 135cabdff1aSopenharmony_ci mov ip, #16 136cabdff1aSopenharmony_ci1: ldrb r8, [r7], #1 137cabdff1aSopenharmony_ci ldr r0, [r5], #4 138cabdff1aSopenharmony_ci ldrb r8, [r6, r8] 139cabdff1aSopenharmony_ci add r0, r0, r4 140cabdff1aSopenharmony_ci cmp r8, #0 141cabdff1aSopenharmony_ci ldrsh r8, [r1] 142cabdff1aSopenharmony_ci iteet ne 143cabdff1aSopenharmony_ci adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB 144cabdff1aSopenharmony_ci adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB 145cabdff1aSopenharmony_ci cmpeq r8, #0 146cabdff1aSopenharmony_ci blxne lr 147cabdff1aSopenharmony_ci subs ip, ip, #1 148cabdff1aSopenharmony_ci add r1, r1, #32 149cabdff1aSopenharmony_ci bne 1b 150cabdff1aSopenharmony_ci pop {r4-r8,pc} 151cabdff1aSopenharmony_ciendfunc 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_cifunction ff_h264_idct_add8_neon, export=1 154cabdff1aSopenharmony_ci push {r4-r10,lr} 155cabdff1aSopenharmony_ci ldm r0, {r4,r9} 156cabdff1aSopenharmony_ci add r5, r1, #16*4 157cabdff1aSopenharmony_ci add r1, r2, #16*32 158cabdff1aSopenharmony_ci mov r2, r3 159cabdff1aSopenharmony_ci mov r10, r1 160cabdff1aSopenharmony_ci ldr r6, [sp, #32] 161cabdff1aSopenharmony_ci movrel r7, scan8+16 162cabdff1aSopenharmony_ci mov r12, #0 163cabdff1aSopenharmony_ci1: ldrb r8, [r7, r12] 164cabdff1aSopenharmony_ci ldr r0, [r5, r12, lsl #2] 165cabdff1aSopenharmony_ci ldrb r8, [r6, r8] 166cabdff1aSopenharmony_ci add r0, r0, r4 167cabdff1aSopenharmony_ci add r1, r10, r12, lsl #5 168cabdff1aSopenharmony_ci cmp r8, #0 169cabdff1aSopenharmony_ci ldrsh r8, [r1] 170cabdff1aSopenharmony_ci iteet ne 171cabdff1aSopenharmony_ci adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB 172cabdff1aSopenharmony_ci adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB 173cabdff1aSopenharmony_ci cmpeq r8, #0 174cabdff1aSopenharmony_ci blxne lr 175cabdff1aSopenharmony_ci add r12, r12, #1 176cabdff1aSopenharmony_ci cmp r12, #4 177cabdff1aSopenharmony_ci itt eq 178cabdff1aSopenharmony_ci moveq r12, #16 179cabdff1aSopenharmony_ci moveq r4, r9 180cabdff1aSopenharmony_ci cmp r12, #20 181cabdff1aSopenharmony_ci blt 1b 182cabdff1aSopenharmony_ci pop {r4-r10,pc} 183cabdff1aSopenharmony_ciendfunc 184cabdff1aSopenharmony_ci 185cabdff1aSopenharmony_ci.macro idct8x8_cols pass 186cabdff1aSopenharmony_ci .if \pass == 0 187cabdff1aSopenharmony_ci qa .req q2 188cabdff1aSopenharmony_ci qb .req q14 189cabdff1aSopenharmony_ci vshr.s16 q2, q10, #1 190cabdff1aSopenharmony_ci vadd.i16 q0, q8, q12 191cabdff1aSopenharmony_ci vld1.16 {q14-q15},[r1,:128] 192cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 193cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 194cabdff1aSopenharmony_ci vsub.i16 q1, q8, q12 195cabdff1aSopenharmony_ci vshr.s16 q3, q14, #1 196cabdff1aSopenharmony_ci vsub.i16 q2, q2, q14 197cabdff1aSopenharmony_ci vadd.i16 q3, q3, q10 198cabdff1aSopenharmony_ci .else 199cabdff1aSopenharmony_ci qa .req q14 200cabdff1aSopenharmony_ci qb .req q2 201cabdff1aSopenharmony_ci vtrn.32 q8, q10 202cabdff1aSopenharmony_ci vtrn.16 q12, q13 203cabdff1aSopenharmony_ci vtrn.32 q9, q11 204cabdff1aSopenharmony_ci vtrn.32 q12, q2 205cabdff1aSopenharmony_ci vtrn.32 q13, q15 206cabdff1aSopenharmony_ci vswp d21, d4 207cabdff1aSopenharmony_ci vshr.s16 q14, q10, #1 208cabdff1aSopenharmony_ci vswp d17, d24 209cabdff1aSopenharmony_ci vshr.s16 q3, q2, #1 210cabdff1aSopenharmony_ci vswp d19, d26 211cabdff1aSopenharmony_ci vadd.i16 q0, q8, q12 212cabdff1aSopenharmony_ci vswp d23, d30 213cabdff1aSopenharmony_ci vsub.i16 q1, q8, q12 214cabdff1aSopenharmony_ci vsub.i16 q14, q14, q2 215cabdff1aSopenharmony_ci vadd.i16 q3, q3, q10 216cabdff1aSopenharmony_ci .endif 217cabdff1aSopenharmony_ci vadd.i16 q10, q1, qa 218cabdff1aSopenharmony_ci vsub.i16 q12, q1, qa 219cabdff1aSopenharmony_ci vadd.i16 q8, q0, q3 220cabdff1aSopenharmony_ci vsub.i16 qb, q0, q3 221cabdff1aSopenharmony_ci vsub.i16 q0, q13, q11 222cabdff1aSopenharmony_ci vadd.i16 q1, q15, q9 223cabdff1aSopenharmony_ci vsub.i16 qa, q15, q9 224cabdff1aSopenharmony_ci vadd.i16 q3, q13, q11 225cabdff1aSopenharmony_ci vsub.i16 q0, q0, q15 226cabdff1aSopenharmony_ci vsub.i16 q1, q1, q11 227cabdff1aSopenharmony_ci vadd.i16 qa, qa, q13 228cabdff1aSopenharmony_ci vadd.i16 q3, q3, q9 229cabdff1aSopenharmony_ci vshr.s16 q9, q9, #1 230cabdff1aSopenharmony_ci vshr.s16 q11, q11, #1 231cabdff1aSopenharmony_ci vshr.s16 q13, q13, #1 232cabdff1aSopenharmony_ci vshr.s16 q15, q15, #1 233cabdff1aSopenharmony_ci vsub.i16 q0, q0, q15 234cabdff1aSopenharmony_ci vsub.i16 q1, q1, q11 235cabdff1aSopenharmony_ci vadd.i16 qa, qa, q13 236cabdff1aSopenharmony_ci vadd.i16 q3, q3, q9 237cabdff1aSopenharmony_ci vshr.s16 q9, q0, #2 238cabdff1aSopenharmony_ci vshr.s16 q11, q1, #2 239cabdff1aSopenharmony_ci vshr.s16 q13, qa, #2 240cabdff1aSopenharmony_ci vshr.s16 q15, q3, #2 241cabdff1aSopenharmony_ci vsub.i16 q3, q3, q9 242cabdff1aSopenharmony_ci vsub.i16 qa, q11, qa 243cabdff1aSopenharmony_ci vadd.i16 q1, q1, q13 244cabdff1aSopenharmony_ci vadd.i16 q0, q0, q15 245cabdff1aSopenharmony_ci .if \pass == 0 246cabdff1aSopenharmony_ci vsub.i16 q15, q8, q3 247cabdff1aSopenharmony_ci vadd.i16 q8, q8, q3 248cabdff1aSopenharmony_ci vadd.i16 q9, q10, q2 249cabdff1aSopenharmony_ci vsub.i16 q2, q10, q2 250cabdff1aSopenharmony_ci vtrn.16 q8, q9 251cabdff1aSopenharmony_ci vadd.i16 q10, q12, q1 252cabdff1aSopenharmony_ci vtrn.16 q2, q15 253cabdff1aSopenharmony_ci vadd.i16 q11, q14, q0 254cabdff1aSopenharmony_ci vsub.i16 q13, q12, q1 255cabdff1aSopenharmony_ci vtrn.16 q10, q11 256cabdff1aSopenharmony_ci vsub.i16 q12, q14, q0 257cabdff1aSopenharmony_ci .else 258cabdff1aSopenharmony_ci vsub.i16 q15, q8, q3 259cabdff1aSopenharmony_ci vadd.i16 q8, q8, q3 260cabdff1aSopenharmony_ci vadd.i16 q9, q10, q14 261cabdff1aSopenharmony_ci vsub.i16 q14, q10, q14 262cabdff1aSopenharmony_ci vadd.i16 q10, q12, q1 263cabdff1aSopenharmony_ci vsub.i16 q13, q12, q1 264cabdff1aSopenharmony_ci vadd.i16 q11, q2, q0 265cabdff1aSopenharmony_ci vsub.i16 q12, q2, q0 266cabdff1aSopenharmony_ci .endif 267cabdff1aSopenharmony_ci .unreq qa 268cabdff1aSopenharmony_ci .unreq qb 269cabdff1aSopenharmony_ci.endm 270cabdff1aSopenharmony_ci 271cabdff1aSopenharmony_cifunction ff_h264_idct8_add_neon, export=1 272cabdff1aSopenharmony_cih264_idct8_add_neon_nothumb: 273cabdff1aSopenharmony_ci vmov.i16 q3, #0 274cabdff1aSopenharmony_ci vld1.16 {q8-q9}, [r1,:128] 275cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 276cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 277cabdff1aSopenharmony_ci vld1.16 {q10-q11},[r1,:128] 278cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 279cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 280cabdff1aSopenharmony_ci vld1.16 {q12-q13},[r1,:128] 281cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 282cabdff1aSopenharmony_ci vst1.16 {q3}, [r1,:128]! 283cabdff1aSopenharmony_ci 284cabdff1aSopenharmony_ci idct8x8_cols 0 285cabdff1aSopenharmony_ci idct8x8_cols 1 286cabdff1aSopenharmony_ci 287cabdff1aSopenharmony_ci mov r3, r0 288cabdff1aSopenharmony_ci vrshr.s16 q8, q8, #6 289cabdff1aSopenharmony_ci vld1.8 {d0}, [r0,:64], r2 290cabdff1aSopenharmony_ci vrshr.s16 q9, q9, #6 291cabdff1aSopenharmony_ci vld1.8 {d1}, [r0,:64], r2 292cabdff1aSopenharmony_ci vrshr.s16 q10, q10, #6 293cabdff1aSopenharmony_ci vld1.8 {d2}, [r0,:64], r2 294cabdff1aSopenharmony_ci vrshr.s16 q11, q11, #6 295cabdff1aSopenharmony_ci vld1.8 {d3}, [r0,:64], r2 296cabdff1aSopenharmony_ci vrshr.s16 q12, q12, #6 297cabdff1aSopenharmony_ci vld1.8 {d4}, [r0,:64], r2 298cabdff1aSopenharmony_ci vrshr.s16 q13, q13, #6 299cabdff1aSopenharmony_ci vld1.8 {d5}, [r0,:64], r2 300cabdff1aSopenharmony_ci vrshr.s16 q14, q14, #6 301cabdff1aSopenharmony_ci vld1.8 {d6}, [r0,:64], r2 302cabdff1aSopenharmony_ci vrshr.s16 q15, q15, #6 303cabdff1aSopenharmony_ci vld1.8 {d7}, [r0,:64], r2 304cabdff1aSopenharmony_ci vaddw.u8 q8, q8, d0 305cabdff1aSopenharmony_ci vaddw.u8 q9, q9, d1 306cabdff1aSopenharmony_ci vaddw.u8 q10, q10, d2 307cabdff1aSopenharmony_ci vqmovun.s16 d0, q8 308cabdff1aSopenharmony_ci vaddw.u8 q11, q11, d3 309cabdff1aSopenharmony_ci vqmovun.s16 d1, q9 310cabdff1aSopenharmony_ci vaddw.u8 q12, q12, d4 311cabdff1aSopenharmony_ci vqmovun.s16 d2, q10 312cabdff1aSopenharmony_ci vst1.8 {d0}, [r3,:64], r2 313cabdff1aSopenharmony_ci vaddw.u8 q13, q13, d5 314cabdff1aSopenharmony_ci vqmovun.s16 d3, q11 315cabdff1aSopenharmony_ci vst1.8 {d1}, [r3,:64], r2 316cabdff1aSopenharmony_ci vaddw.u8 q14, q14, d6 317cabdff1aSopenharmony_ci vqmovun.s16 d4, q12 318cabdff1aSopenharmony_ci vst1.8 {d2}, [r3,:64], r2 319cabdff1aSopenharmony_ci vaddw.u8 q15, q15, d7 320cabdff1aSopenharmony_ci vqmovun.s16 d5, q13 321cabdff1aSopenharmony_ci vst1.8 {d3}, [r3,:64], r2 322cabdff1aSopenharmony_ci vqmovun.s16 d6, q14 323cabdff1aSopenharmony_ci vqmovun.s16 d7, q15 324cabdff1aSopenharmony_ci vst1.8 {d4}, [r3,:64], r2 325cabdff1aSopenharmony_ci vst1.8 {d5}, [r3,:64], r2 326cabdff1aSopenharmony_ci vst1.8 {d6}, [r3,:64], r2 327cabdff1aSopenharmony_ci vst1.8 {d7}, [r3,:64], r2 328cabdff1aSopenharmony_ci 329cabdff1aSopenharmony_ci sub r1, r1, #128 330cabdff1aSopenharmony_ci bx lr 331cabdff1aSopenharmony_ciendfunc 332cabdff1aSopenharmony_ci 333cabdff1aSopenharmony_cifunction ff_h264_idct8_dc_add_neon, export=1 334cabdff1aSopenharmony_cih264_idct8_dc_add_neon_nothumb: 335cabdff1aSopenharmony_ci mov r3, #0 336cabdff1aSopenharmony_ci vld1.16 {d30[],d31[]},[r1,:16] 337cabdff1aSopenharmony_ci strh r3, [r1] 338cabdff1aSopenharmony_ci vld1.32 {d0}, [r0,:64], r2 339cabdff1aSopenharmony_ci vrshr.s16 q15, q15, #6 340cabdff1aSopenharmony_ci vld1.32 {d1}, [r0,:64], r2 341cabdff1aSopenharmony_ci vld1.32 {d2}, [r0,:64], r2 342cabdff1aSopenharmony_ci vaddw.u8 q8, q15, d0 343cabdff1aSopenharmony_ci vld1.32 {d3}, [r0,:64], r2 344cabdff1aSopenharmony_ci vaddw.u8 q9, q15, d1 345cabdff1aSopenharmony_ci vld1.32 {d4}, [r0,:64], r2 346cabdff1aSopenharmony_ci vaddw.u8 q10, q15, d2 347cabdff1aSopenharmony_ci vld1.32 {d5}, [r0,:64], r2 348cabdff1aSopenharmony_ci vaddw.u8 q11, q15, d3 349cabdff1aSopenharmony_ci vld1.32 {d6}, [r0,:64], r2 350cabdff1aSopenharmony_ci vaddw.u8 q12, q15, d4 351cabdff1aSopenharmony_ci vld1.32 {d7}, [r0,:64], r2 352cabdff1aSopenharmony_ci vaddw.u8 q13, q15, d5 353cabdff1aSopenharmony_ci vaddw.u8 q14, q15, d6 354cabdff1aSopenharmony_ci vaddw.u8 q15, q15, d7 355cabdff1aSopenharmony_ci vqmovun.s16 d0, q8 356cabdff1aSopenharmony_ci vqmovun.s16 d1, q9 357cabdff1aSopenharmony_ci vqmovun.s16 d2, q10 358cabdff1aSopenharmony_ci vqmovun.s16 d3, q11 359cabdff1aSopenharmony_ci sub r0, r0, r2, lsl #3 360cabdff1aSopenharmony_ci vst1.32 {d0}, [r0,:64], r2 361cabdff1aSopenharmony_ci vqmovun.s16 d4, q12 362cabdff1aSopenharmony_ci vst1.32 {d1}, [r0,:64], r2 363cabdff1aSopenharmony_ci vqmovun.s16 d5, q13 364cabdff1aSopenharmony_ci vst1.32 {d2}, [r0,:64], r2 365cabdff1aSopenharmony_ci vqmovun.s16 d6, q14 366cabdff1aSopenharmony_ci vst1.32 {d3}, [r0,:64], r2 367cabdff1aSopenharmony_ci vqmovun.s16 d7, q15 368cabdff1aSopenharmony_ci vst1.32 {d4}, [r0,:64], r2 369cabdff1aSopenharmony_ci vst1.32 {d5}, [r0,:64], r2 370cabdff1aSopenharmony_ci vst1.32 {d6}, [r0,:64], r2 371cabdff1aSopenharmony_ci vst1.32 {d7}, [r0,:64], r2 372cabdff1aSopenharmony_ci bx lr 373cabdff1aSopenharmony_ciendfunc 374cabdff1aSopenharmony_ci 375cabdff1aSopenharmony_cifunction ff_h264_idct8_add4_neon, export=1 376cabdff1aSopenharmony_ci push {r4-r8,lr} 377cabdff1aSopenharmony_ci mov r4, r0 378cabdff1aSopenharmony_ci mov r5, r1 379cabdff1aSopenharmony_ci mov r1, r2 380cabdff1aSopenharmony_ci mov r2, r3 381cabdff1aSopenharmony_ci ldr r6, [sp, #24] 382cabdff1aSopenharmony_ci movrel r7, scan8 383cabdff1aSopenharmony_ci mov r12, #16 384cabdff1aSopenharmony_ci1: ldrb r8, [r7], #4 385cabdff1aSopenharmony_ci ldr r0, [r5], #16 386cabdff1aSopenharmony_ci ldrb r8, [r6, r8] 387cabdff1aSopenharmony_ci subs r8, r8, #1 388cabdff1aSopenharmony_ci blt 2f 389cabdff1aSopenharmony_ci ldrsh lr, [r1] 390cabdff1aSopenharmony_ci add r0, r0, r4 391cabdff1aSopenharmony_ci it ne 392cabdff1aSopenharmony_ci movne lr, #0 393cabdff1aSopenharmony_ci cmp lr, #0 394cabdff1aSopenharmony_ci ite ne 395cabdff1aSopenharmony_ci adrne lr, h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB 396cabdff1aSopenharmony_ci adreq lr, h264_idct8_add_neon_nothumb + CONFIG_THUMB 397cabdff1aSopenharmony_ci blx lr 398cabdff1aSopenharmony_ci2: subs r12, r12, #4 399cabdff1aSopenharmony_ci add r1, r1, #128 400cabdff1aSopenharmony_ci bne 1b 401cabdff1aSopenharmony_ci pop {r4-r8,pc} 402cabdff1aSopenharmony_ciendfunc 403cabdff1aSopenharmony_ci 404cabdff1aSopenharmony_ciconst scan8 405cabdff1aSopenharmony_ci .byte 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8 406cabdff1aSopenharmony_ci .byte 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8 407cabdff1aSopenharmony_ci .byte 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8 408cabdff1aSopenharmony_ci .byte 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8 409cabdff1aSopenharmony_ci .byte 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8 410cabdff1aSopenharmony_ci .byte 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8 411cabdff1aSopenharmony_ci .byte 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8 412cabdff1aSopenharmony_ci .byte 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8 413cabdff1aSopenharmony_ci .byte 4+11*8, 5+11*8, 4+12*8, 5+12*8 414cabdff1aSopenharmony_ci .byte 6+11*8, 7+11*8, 6+12*8, 7+12*8 415cabdff1aSopenharmony_ci .byte 4+13*8, 5+13*8, 4+14*8, 5+14*8 416cabdff1aSopenharmony_ci .byte 6+13*8, 7+13*8, 6+14*8, 7+14*8 417cabdff1aSopenharmony_ciendconst 418