1cabdff1aSopenharmony_ci;***************************************************************************** 2cabdff1aSopenharmony_ci;* MMX/SSE2/AVX-optimized 10-bit H.264 deblocking code 3cabdff1aSopenharmony_ci;***************************************************************************** 4cabdff1aSopenharmony_ci;* Copyright (C) 2005-2011 x264 project 5cabdff1aSopenharmony_ci;* 6cabdff1aSopenharmony_ci;* Authors: Oskar Arvidsson <oskar@irock.se> 7cabdff1aSopenharmony_ci;* Loren Merritt <lorenm@u.washington.edu> 8cabdff1aSopenharmony_ci;* Fiona Glaser <fiona@x264.com> 9cabdff1aSopenharmony_ci;* 10cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 11cabdff1aSopenharmony_ci;* 12cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 13cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 14cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 15cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 16cabdff1aSopenharmony_ci;* 17cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 18cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 19cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 21cabdff1aSopenharmony_ci;* 22cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 23cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 24cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25cabdff1aSopenharmony_ci;****************************************************************************** 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ciSECTION .text 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_cicextern pw_2 32cabdff1aSopenharmony_cicextern pw_3 33cabdff1aSopenharmony_cicextern pw_4 34cabdff1aSopenharmony_cicextern pw_1023 35cabdff1aSopenharmony_ci%define pw_pixel_max pw_1023 36cabdff1aSopenharmony_ci 37cabdff1aSopenharmony_ci; out: %4 = |%1-%2|-%3 38cabdff1aSopenharmony_ci; clobbers: %5 39cabdff1aSopenharmony_ci%macro ABS_SUB 5 40cabdff1aSopenharmony_ci psubusw %5, %2, %1 41cabdff1aSopenharmony_ci psubusw %4, %1, %2 42cabdff1aSopenharmony_ci por %4, %5 43cabdff1aSopenharmony_ci psubw %4, %3 44cabdff1aSopenharmony_ci%endmacro 45cabdff1aSopenharmony_ci 46cabdff1aSopenharmony_ci; out: %4 = |%1-%2|<%3 47cabdff1aSopenharmony_ci%macro DIFF_LT 5 48cabdff1aSopenharmony_ci psubusw %4, %2, %1 49cabdff1aSopenharmony_ci psubusw %5, %1, %2 50cabdff1aSopenharmony_ci por %5, %4 ; |%1-%2| 51cabdff1aSopenharmony_ci pxor %4, %4 52cabdff1aSopenharmony_ci psubw %5, %3 ; |%1-%2|-%3 53cabdff1aSopenharmony_ci pcmpgtw %4, %5 ; 0 > |%1-%2|-%3 54cabdff1aSopenharmony_ci%endmacro 55cabdff1aSopenharmony_ci 56cabdff1aSopenharmony_ci%macro LOAD_AB 4 57cabdff1aSopenharmony_ci movd %1, %3 58cabdff1aSopenharmony_ci movd %2, %4 59cabdff1aSopenharmony_ci SPLATW %1, %1 60cabdff1aSopenharmony_ci SPLATW %2, %2 61cabdff1aSopenharmony_ci%endmacro 62cabdff1aSopenharmony_ci 63cabdff1aSopenharmony_ci; in: %2=tc reg 64cabdff1aSopenharmony_ci; out: %1=splatted tc 65cabdff1aSopenharmony_ci%macro LOAD_TC 2 66cabdff1aSopenharmony_ci movd %1, [%2] 67cabdff1aSopenharmony_ci punpcklbw %1, %1 68cabdff1aSopenharmony_ci%if mmsize == 8 69cabdff1aSopenharmony_ci pshufw %1, %1, 0 70cabdff1aSopenharmony_ci%else 71cabdff1aSopenharmony_ci pshuflw %1, %1, 01010000b 72cabdff1aSopenharmony_ci pshufd %1, %1, 01010000b 73cabdff1aSopenharmony_ci%endif 74cabdff1aSopenharmony_ci psraw %1, 6 75cabdff1aSopenharmony_ci%endmacro 76cabdff1aSopenharmony_ci 77cabdff1aSopenharmony_ci; in: %1=p1, %2=p0, %3=q0, %4=q1 78cabdff1aSopenharmony_ci; %5=alpha, %6=beta, %7-%9=tmp 79cabdff1aSopenharmony_ci; out: %7=mask 80cabdff1aSopenharmony_ci%macro LOAD_MASK 9 81cabdff1aSopenharmony_ci ABS_SUB %2, %3, %5, %8, %7 ; |p0-q0| - alpha 82cabdff1aSopenharmony_ci ABS_SUB %1, %2, %6, %9, %7 ; |p1-p0| - beta 83cabdff1aSopenharmony_ci pand %8, %9 84cabdff1aSopenharmony_ci ABS_SUB %3, %4, %6, %9, %7 ; |q1-q0| - beta 85cabdff1aSopenharmony_ci pxor %7, %7 86cabdff1aSopenharmony_ci pand %8, %9 87cabdff1aSopenharmony_ci pcmpgtw %7, %8 88cabdff1aSopenharmony_ci%endmacro 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp 91cabdff1aSopenharmony_ci; out: %1=p0', m2=q0' 92cabdff1aSopenharmony_ci%macro DEBLOCK_P0_Q0 7 93cabdff1aSopenharmony_ci psubw %3, %4 94cabdff1aSopenharmony_ci pxor %7, %7 95cabdff1aSopenharmony_ci paddw %3, [pw_4] 96cabdff1aSopenharmony_ci psubw %7, %5 97cabdff1aSopenharmony_ci psubw %6, %2, %1 98cabdff1aSopenharmony_ci psllw %6, 2 99cabdff1aSopenharmony_ci paddw %3, %6 100cabdff1aSopenharmony_ci psraw %3, 3 101cabdff1aSopenharmony_ci mova %6, [pw_pixel_max] 102cabdff1aSopenharmony_ci CLIPW %3, %7, %5 103cabdff1aSopenharmony_ci pxor %7, %7 104cabdff1aSopenharmony_ci paddw %1, %3 105cabdff1aSopenharmony_ci psubw %2, %3 106cabdff1aSopenharmony_ci CLIPW %1, %7, %6 107cabdff1aSopenharmony_ci CLIPW %2, %7, %6 108cabdff1aSopenharmony_ci%endmacro 109cabdff1aSopenharmony_ci 110cabdff1aSopenharmony_ci; in: %1=x2, %2=x1, %3=p0, %4=q0 %5=mask&tc, %6=tmp 111cabdff1aSopenharmony_ci%macro LUMA_Q1 6 112cabdff1aSopenharmony_ci pavgw %6, %3, %4 ; (p0+q0+1)>>1 113cabdff1aSopenharmony_ci paddw %1, %6 114cabdff1aSopenharmony_ci pxor %6, %6 115cabdff1aSopenharmony_ci psraw %1, 1 116cabdff1aSopenharmony_ci psubw %6, %5 117cabdff1aSopenharmony_ci psubw %1, %2 118cabdff1aSopenharmony_ci CLIPW %1, %6, %5 119cabdff1aSopenharmony_ci paddw %1, %2 120cabdff1aSopenharmony_ci%endmacro 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_ci%macro LUMA_DEBLOCK_ONE 3 123cabdff1aSopenharmony_ci DIFF_LT m5, %1, bm, m4, m6 124cabdff1aSopenharmony_ci pxor m6, m6 125cabdff1aSopenharmony_ci mova %3, m4 126cabdff1aSopenharmony_ci pcmpgtw m6, tcm 127cabdff1aSopenharmony_ci pand m4, tcm 128cabdff1aSopenharmony_ci pandn m6, m7 129cabdff1aSopenharmony_ci pand m4, m6 130cabdff1aSopenharmony_ci LUMA_Q1 m5, %2, m1, m2, m4, m6 131cabdff1aSopenharmony_ci%endmacro 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_ci%macro LUMA_H_STORE 2 134cabdff1aSopenharmony_ci%if mmsize == 8 135cabdff1aSopenharmony_ci movq [r0-4], m0 136cabdff1aSopenharmony_ci movq [r0+r1-4], m1 137cabdff1aSopenharmony_ci movq [r0+r1*2-4], m2 138cabdff1aSopenharmony_ci movq [r0+%2-4], m3 139cabdff1aSopenharmony_ci%else 140cabdff1aSopenharmony_ci movq [r0-4], m0 141cabdff1aSopenharmony_ci movhps [r0+r1-4], m0 142cabdff1aSopenharmony_ci movq [r0+r1*2-4], m1 143cabdff1aSopenharmony_ci movhps [%1-4], m1 144cabdff1aSopenharmony_ci movq [%1+r1-4], m2 145cabdff1aSopenharmony_ci movhps [%1+r1*2-4], m2 146cabdff1aSopenharmony_ci movq [%1+%2-4], m3 147cabdff1aSopenharmony_ci movhps [%1+r1*4-4], m3 148cabdff1aSopenharmony_ci%endif 149cabdff1aSopenharmony_ci%endmacro 150cabdff1aSopenharmony_ci 151cabdff1aSopenharmony_ci%macro DEBLOCK_LUMA 0 152cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 153cabdff1aSopenharmony_ci; void ff_deblock_v_luma_10(uint16_t *pix, int stride, int alpha, int beta, 154cabdff1aSopenharmony_ci; int8_t *tc0) 155cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 156cabdff1aSopenharmony_cicglobal deblock_v_luma_10, 5,5,8*(mmsize/16) 157cabdff1aSopenharmony_ci %assign pad 5*mmsize+12-(stack_offset&15) 158cabdff1aSopenharmony_ci %define tcm [rsp] 159cabdff1aSopenharmony_ci %define ms1 [rsp+mmsize] 160cabdff1aSopenharmony_ci %define ms2 [rsp+mmsize*2] 161cabdff1aSopenharmony_ci %define am [rsp+mmsize*3] 162cabdff1aSopenharmony_ci %define bm [rsp+mmsize*4] 163cabdff1aSopenharmony_ci SUB rsp, pad 164cabdff1aSopenharmony_ci shl r2d, 2 165cabdff1aSopenharmony_ci shl r3d, 2 166cabdff1aSopenharmony_ci LOAD_AB m4, m5, r2d, r3d 167cabdff1aSopenharmony_ci mov r3, 32/mmsize 168cabdff1aSopenharmony_ci mov r2, r0 169cabdff1aSopenharmony_ci sub r0, r1 170cabdff1aSopenharmony_ci mova am, m4 171cabdff1aSopenharmony_ci sub r0, r1 172cabdff1aSopenharmony_ci mova bm, m5 173cabdff1aSopenharmony_ci sub r0, r1 174cabdff1aSopenharmony_ci.loop: 175cabdff1aSopenharmony_ci mova m0, [r0+r1] 176cabdff1aSopenharmony_ci mova m1, [r0+r1*2] 177cabdff1aSopenharmony_ci mova m2, [r2] 178cabdff1aSopenharmony_ci mova m3, [r2+r1] 179cabdff1aSopenharmony_ci 180cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, am, bm, m7, m4, m6 181cabdff1aSopenharmony_ci LOAD_TC m6, r4 182cabdff1aSopenharmony_ci mova tcm, m6 183cabdff1aSopenharmony_ci 184cabdff1aSopenharmony_ci mova m5, [r0] 185cabdff1aSopenharmony_ci LUMA_DEBLOCK_ONE m1, m0, ms1 186cabdff1aSopenharmony_ci mova [r0+r1], m5 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_ci mova m5, [r2+r1*2] 189cabdff1aSopenharmony_ci LUMA_DEBLOCK_ONE m2, m3, ms2 190cabdff1aSopenharmony_ci mova [r2+r1], m5 191cabdff1aSopenharmony_ci 192cabdff1aSopenharmony_ci pxor m5, m5 193cabdff1aSopenharmony_ci mova m6, tcm 194cabdff1aSopenharmony_ci pcmpgtw m5, tcm 195cabdff1aSopenharmony_ci psubw m6, ms1 196cabdff1aSopenharmony_ci pandn m5, m7 197cabdff1aSopenharmony_ci psubw m6, ms2 198cabdff1aSopenharmony_ci pand m5, m6 199cabdff1aSopenharmony_ci DEBLOCK_P0_Q0 m1, m2, m0, m3, m5, m7, m6 200cabdff1aSopenharmony_ci mova [r0+r1*2], m1 201cabdff1aSopenharmony_ci mova [r2], m2 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_ci add r0, mmsize 204cabdff1aSopenharmony_ci add r2, mmsize 205cabdff1aSopenharmony_ci add r4, mmsize/8 206cabdff1aSopenharmony_ci dec r3 207cabdff1aSopenharmony_ci jg .loop 208cabdff1aSopenharmony_ci ADD rsp, pad 209cabdff1aSopenharmony_ci RET 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_cicglobal deblock_h_luma_10, 5,6,8*(mmsize/16) 212cabdff1aSopenharmony_ci %assign pad 7*mmsize+12-(stack_offset&15) 213cabdff1aSopenharmony_ci %define tcm [rsp] 214cabdff1aSopenharmony_ci %define ms1 [rsp+mmsize] 215cabdff1aSopenharmony_ci %define ms2 [rsp+mmsize*2] 216cabdff1aSopenharmony_ci %define p1m [rsp+mmsize*3] 217cabdff1aSopenharmony_ci %define p2m [rsp+mmsize*4] 218cabdff1aSopenharmony_ci %define am [rsp+mmsize*5] 219cabdff1aSopenharmony_ci %define bm [rsp+mmsize*6] 220cabdff1aSopenharmony_ci SUB rsp, pad 221cabdff1aSopenharmony_ci shl r2d, 2 222cabdff1aSopenharmony_ci shl r3d, 2 223cabdff1aSopenharmony_ci LOAD_AB m4, m5, r2d, r3d 224cabdff1aSopenharmony_ci mov r3, r1 225cabdff1aSopenharmony_ci mova am, m4 226cabdff1aSopenharmony_ci add r3, r1 227cabdff1aSopenharmony_ci mov r5, 32/mmsize 228cabdff1aSopenharmony_ci mova bm, m5 229cabdff1aSopenharmony_ci add r3, r1 230cabdff1aSopenharmony_ci%if mmsize == 16 231cabdff1aSopenharmony_ci mov r2, r0 232cabdff1aSopenharmony_ci add r2, r3 233cabdff1aSopenharmony_ci%endif 234cabdff1aSopenharmony_ci.loop: 235cabdff1aSopenharmony_ci%if mmsize == 8 236cabdff1aSopenharmony_ci movq m2, [r0-8] ; y q2 q1 q0 237cabdff1aSopenharmony_ci movq m7, [r0+0] 238cabdff1aSopenharmony_ci movq m5, [r0+r1-8] 239cabdff1aSopenharmony_ci movq m3, [r0+r1+0] 240cabdff1aSopenharmony_ci movq m0, [r0+r1*2-8] 241cabdff1aSopenharmony_ci movq m6, [r0+r1*2+0] 242cabdff1aSopenharmony_ci movq m1, [r0+r3-8] 243cabdff1aSopenharmony_ci TRANSPOSE4x4W 2, 5, 0, 1, 4 244cabdff1aSopenharmony_ci SWAP 2, 7 245cabdff1aSopenharmony_ci movq m7, [r0+r3] 246cabdff1aSopenharmony_ci TRANSPOSE4x4W 2, 3, 6, 7, 4 247cabdff1aSopenharmony_ci%else 248cabdff1aSopenharmony_ci movu m5, [r0-8] ; y q2 q1 q0 p0 p1 p2 x 249cabdff1aSopenharmony_ci movu m0, [r0+r1-8] 250cabdff1aSopenharmony_ci movu m2, [r0+r1*2-8] 251cabdff1aSopenharmony_ci movu m3, [r2-8] 252cabdff1aSopenharmony_ci TRANSPOSE4x4W 5, 0, 2, 3, 6 253cabdff1aSopenharmony_ci mova tcm, m3 254cabdff1aSopenharmony_ci 255cabdff1aSopenharmony_ci movu m4, [r2+r1-8] 256cabdff1aSopenharmony_ci movu m1, [r2+r1*2-8] 257cabdff1aSopenharmony_ci movu m3, [r2+r3-8] 258cabdff1aSopenharmony_ci movu m7, [r2+r1*4-8] 259cabdff1aSopenharmony_ci TRANSPOSE4x4W 4, 1, 3, 7, 6 260cabdff1aSopenharmony_ci 261cabdff1aSopenharmony_ci mova m6, tcm 262cabdff1aSopenharmony_ci punpcklqdq m6, m7 263cabdff1aSopenharmony_ci punpckhqdq m5, m4 264cabdff1aSopenharmony_ci SBUTTERFLY qdq, 0, 1, 7 265cabdff1aSopenharmony_ci SBUTTERFLY qdq, 2, 3, 7 266cabdff1aSopenharmony_ci%endif 267cabdff1aSopenharmony_ci 268cabdff1aSopenharmony_ci mova p2m, m6 269cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, am, bm, m7, m4, m6 270cabdff1aSopenharmony_ci LOAD_TC m6, r4 271cabdff1aSopenharmony_ci mova tcm, m6 272cabdff1aSopenharmony_ci 273cabdff1aSopenharmony_ci LUMA_DEBLOCK_ONE m1, m0, ms1 274cabdff1aSopenharmony_ci mova p1m, m5 275cabdff1aSopenharmony_ci 276cabdff1aSopenharmony_ci mova m5, p2m 277cabdff1aSopenharmony_ci LUMA_DEBLOCK_ONE m2, m3, ms2 278cabdff1aSopenharmony_ci mova p2m, m5 279cabdff1aSopenharmony_ci 280cabdff1aSopenharmony_ci pxor m5, m5 281cabdff1aSopenharmony_ci mova m6, tcm 282cabdff1aSopenharmony_ci pcmpgtw m5, tcm 283cabdff1aSopenharmony_ci psubw m6, ms1 284cabdff1aSopenharmony_ci pandn m5, m7 285cabdff1aSopenharmony_ci psubw m6, ms2 286cabdff1aSopenharmony_ci pand m5, m6 287cabdff1aSopenharmony_ci DEBLOCK_P0_Q0 m1, m2, m0, m3, m5, m7, m6 288cabdff1aSopenharmony_ci mova m0, p1m 289cabdff1aSopenharmony_ci mova m3, p2m 290cabdff1aSopenharmony_ci TRANSPOSE4x4W 0, 1, 2, 3, 4 291cabdff1aSopenharmony_ci LUMA_H_STORE r2, r3 292cabdff1aSopenharmony_ci 293cabdff1aSopenharmony_ci add r4, mmsize/8 294cabdff1aSopenharmony_ci lea r0, [r0+r1*(mmsize/2)] 295cabdff1aSopenharmony_ci lea r2, [r2+r1*(mmsize/2)] 296cabdff1aSopenharmony_ci dec r5 297cabdff1aSopenharmony_ci jg .loop 298cabdff1aSopenharmony_ci ADD rsp, pad 299cabdff1aSopenharmony_ci RET 300cabdff1aSopenharmony_ci%endmacro 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci%if ARCH_X86_64 303cabdff1aSopenharmony_ci; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2 304cabdff1aSopenharmony_ci; m12=alpha, m13=beta 305cabdff1aSopenharmony_ci; out: m0=p1', m3=q1', m1=p0', m2=q0' 306cabdff1aSopenharmony_ci; clobbers: m4, m5, m6, m7, m10, m11, m14 307cabdff1aSopenharmony_ci%macro DEBLOCK_LUMA_INTER_SSE2 0 308cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, m12, m13, m7, m4, m6 309cabdff1aSopenharmony_ci LOAD_TC m6, r4 310cabdff1aSopenharmony_ci DIFF_LT m8, m1, m13, m10, m4 311cabdff1aSopenharmony_ci DIFF_LT m9, m2, m13, m11, m4 312cabdff1aSopenharmony_ci pand m6, m7 313cabdff1aSopenharmony_ci 314cabdff1aSopenharmony_ci mova m14, m6 315cabdff1aSopenharmony_ci pxor m4, m4 316cabdff1aSopenharmony_ci pcmpgtw m6, m4 317cabdff1aSopenharmony_ci pand m6, m14 318cabdff1aSopenharmony_ci 319cabdff1aSopenharmony_ci mova m5, m10 320cabdff1aSopenharmony_ci pand m5, m6 321cabdff1aSopenharmony_ci LUMA_Q1 m8, m0, m1, m2, m5, m4 322cabdff1aSopenharmony_ci 323cabdff1aSopenharmony_ci mova m5, m11 324cabdff1aSopenharmony_ci pand m5, m6 325cabdff1aSopenharmony_ci LUMA_Q1 m9, m3, m1, m2, m5, m4 326cabdff1aSopenharmony_ci 327cabdff1aSopenharmony_ci pxor m4, m4 328cabdff1aSopenharmony_ci psubw m6, m10 329cabdff1aSopenharmony_ci pcmpgtw m4, m14 330cabdff1aSopenharmony_ci pandn m4, m7 331cabdff1aSopenharmony_ci psubw m6, m11 332cabdff1aSopenharmony_ci pand m4, m6 333cabdff1aSopenharmony_ci DEBLOCK_P0_Q0 m1, m2, m0, m3, m4, m5, m6 334cabdff1aSopenharmony_ci 335cabdff1aSopenharmony_ci SWAP 0, 8 336cabdff1aSopenharmony_ci SWAP 3, 9 337cabdff1aSopenharmony_ci%endmacro 338cabdff1aSopenharmony_ci 339cabdff1aSopenharmony_ci%macro DEBLOCK_LUMA_64 0 340cabdff1aSopenharmony_cicglobal deblock_v_luma_10, 5,5,15 341cabdff1aSopenharmony_ci %define p2 m8 342cabdff1aSopenharmony_ci %define p1 m0 343cabdff1aSopenharmony_ci %define p0 m1 344cabdff1aSopenharmony_ci %define q0 m2 345cabdff1aSopenharmony_ci %define q1 m3 346cabdff1aSopenharmony_ci %define q2 m9 347cabdff1aSopenharmony_ci %define mask0 m7 348cabdff1aSopenharmony_ci %define mask1 m10 349cabdff1aSopenharmony_ci %define mask2 m11 350cabdff1aSopenharmony_ci shl r2d, 2 351cabdff1aSopenharmony_ci shl r3d, 2 352cabdff1aSopenharmony_ci LOAD_AB m12, m13, r2d, r3d 353cabdff1aSopenharmony_ci mov r2, r0 354cabdff1aSopenharmony_ci sub r0, r1 355cabdff1aSopenharmony_ci sub r0, r1 356cabdff1aSopenharmony_ci sub r0, r1 357cabdff1aSopenharmony_ci mov r3, 2 358cabdff1aSopenharmony_ci.loop: 359cabdff1aSopenharmony_ci mova p2, [r0] 360cabdff1aSopenharmony_ci mova p1, [r0+r1] 361cabdff1aSopenharmony_ci mova p0, [r0+r1*2] 362cabdff1aSopenharmony_ci mova q0, [r2] 363cabdff1aSopenharmony_ci mova q1, [r2+r1] 364cabdff1aSopenharmony_ci mova q2, [r2+r1*2] 365cabdff1aSopenharmony_ci DEBLOCK_LUMA_INTER_SSE2 366cabdff1aSopenharmony_ci mova [r0+r1], p1 367cabdff1aSopenharmony_ci mova [r0+r1*2], p0 368cabdff1aSopenharmony_ci mova [r2], q0 369cabdff1aSopenharmony_ci mova [r2+r1], q1 370cabdff1aSopenharmony_ci add r0, mmsize 371cabdff1aSopenharmony_ci add r2, mmsize 372cabdff1aSopenharmony_ci add r4, 2 373cabdff1aSopenharmony_ci dec r3 374cabdff1aSopenharmony_ci jg .loop 375cabdff1aSopenharmony_ci REP_RET 376cabdff1aSopenharmony_ci 377cabdff1aSopenharmony_cicglobal deblock_h_luma_10, 5,7,15 378cabdff1aSopenharmony_ci shl r2d, 2 379cabdff1aSopenharmony_ci shl r3d, 2 380cabdff1aSopenharmony_ci LOAD_AB m12, m13, r2d, r3d 381cabdff1aSopenharmony_ci mov r2, r1 382cabdff1aSopenharmony_ci add r2, r1 383cabdff1aSopenharmony_ci add r2, r1 384cabdff1aSopenharmony_ci mov r5, r0 385cabdff1aSopenharmony_ci add r5, r2 386cabdff1aSopenharmony_ci mov r6, 2 387cabdff1aSopenharmony_ci.loop: 388cabdff1aSopenharmony_ci movu m8, [r0-8] ; y q2 q1 q0 p0 p1 p2 x 389cabdff1aSopenharmony_ci movu m0, [r0+r1-8] 390cabdff1aSopenharmony_ci movu m2, [r0+r1*2-8] 391cabdff1aSopenharmony_ci movu m9, [r5-8] 392cabdff1aSopenharmony_ci movu m5, [r5+r1-8] 393cabdff1aSopenharmony_ci movu m1, [r5+r1*2-8] 394cabdff1aSopenharmony_ci movu m3, [r5+r2-8] 395cabdff1aSopenharmony_ci movu m7, [r5+r1*4-8] 396cabdff1aSopenharmony_ci 397cabdff1aSopenharmony_ci TRANSPOSE4x4W 8, 0, 2, 9, 10 398cabdff1aSopenharmony_ci TRANSPOSE4x4W 5, 1, 3, 7, 10 399cabdff1aSopenharmony_ci 400cabdff1aSopenharmony_ci punpckhqdq m8, m5 401cabdff1aSopenharmony_ci SBUTTERFLY qdq, 0, 1, 10 402cabdff1aSopenharmony_ci SBUTTERFLY qdq, 2, 3, 10 403cabdff1aSopenharmony_ci punpcklqdq m9, m7 404cabdff1aSopenharmony_ci 405cabdff1aSopenharmony_ci DEBLOCK_LUMA_INTER_SSE2 406cabdff1aSopenharmony_ci 407cabdff1aSopenharmony_ci TRANSPOSE4x4W 0, 1, 2, 3, 4 408cabdff1aSopenharmony_ci LUMA_H_STORE r5, r2 409cabdff1aSopenharmony_ci add r4, 2 410cabdff1aSopenharmony_ci lea r0, [r0+r1*8] 411cabdff1aSopenharmony_ci lea r5, [r5+r1*8] 412cabdff1aSopenharmony_ci dec r6 413cabdff1aSopenharmony_ci jg .loop 414cabdff1aSopenharmony_ci REP_RET 415cabdff1aSopenharmony_ci%endmacro 416cabdff1aSopenharmony_ci 417cabdff1aSopenharmony_ciINIT_XMM sse2 418cabdff1aSopenharmony_ciDEBLOCK_LUMA_64 419cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL 420cabdff1aSopenharmony_ciINIT_XMM avx 421cabdff1aSopenharmony_ciDEBLOCK_LUMA_64 422cabdff1aSopenharmony_ci%endif 423cabdff1aSopenharmony_ci%endif 424cabdff1aSopenharmony_ci 425cabdff1aSopenharmony_ci%macro SWAPMOVA 2 426cabdff1aSopenharmony_ci%ifid %1 427cabdff1aSopenharmony_ci SWAP %1, %2 428cabdff1aSopenharmony_ci%else 429cabdff1aSopenharmony_ci mova %1, %2 430cabdff1aSopenharmony_ci%endif 431cabdff1aSopenharmony_ci%endmacro 432cabdff1aSopenharmony_ci 433cabdff1aSopenharmony_ci; in: t0-t2: tmp registers 434cabdff1aSopenharmony_ci; %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0 435cabdff1aSopenharmony_ci; %8=mask1p %9=2 %10=p0' %11=p1' %12=p2' 436cabdff1aSopenharmony_ci%macro LUMA_INTRA_P012 12 ; p0..p3 in memory 437cabdff1aSopenharmony_ci%if ARCH_X86_64 438cabdff1aSopenharmony_ci paddw t0, %3, %2 439cabdff1aSopenharmony_ci mova t2, %4 440cabdff1aSopenharmony_ci paddw t2, %3 441cabdff1aSopenharmony_ci%else 442cabdff1aSopenharmony_ci mova t0, %3 443cabdff1aSopenharmony_ci mova t2, %4 444cabdff1aSopenharmony_ci paddw t0, %2 445cabdff1aSopenharmony_ci paddw t2, %3 446cabdff1aSopenharmony_ci%endif 447cabdff1aSopenharmony_ci paddw t0, %1 448cabdff1aSopenharmony_ci paddw t2, t2 449cabdff1aSopenharmony_ci paddw t0, %5 450cabdff1aSopenharmony_ci paddw t2, %9 451cabdff1aSopenharmony_ci paddw t0, %9 ; (p2 + p1 + p0 + q0 + 2) 452cabdff1aSopenharmony_ci paddw t2, t0 ; (2*p3 + 3*p2 + p1 + p0 + q0 + 4) 453cabdff1aSopenharmony_ci 454cabdff1aSopenharmony_ci psrlw t2, 3 455cabdff1aSopenharmony_ci psrlw t1, t0, 2 456cabdff1aSopenharmony_ci psubw t2, %3 457cabdff1aSopenharmony_ci psubw t1, %2 458cabdff1aSopenharmony_ci pand t2, %8 459cabdff1aSopenharmony_ci pand t1, %8 460cabdff1aSopenharmony_ci paddw t2, %3 461cabdff1aSopenharmony_ci paddw t1, %2 462cabdff1aSopenharmony_ci SWAPMOVA %11, t1 463cabdff1aSopenharmony_ci 464cabdff1aSopenharmony_ci psubw t1, t0, %3 465cabdff1aSopenharmony_ci paddw t0, t0 466cabdff1aSopenharmony_ci psubw t1, %5 467cabdff1aSopenharmony_ci psubw t0, %3 468cabdff1aSopenharmony_ci paddw t1, %6 469cabdff1aSopenharmony_ci paddw t1, %2 470cabdff1aSopenharmony_ci paddw t0, %6 471cabdff1aSopenharmony_ci psrlw t1, 2 ; (2*p1 + p0 + q1 + 2)/4 472cabdff1aSopenharmony_ci psrlw t0, 3 ; (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3 473cabdff1aSopenharmony_ci 474cabdff1aSopenharmony_ci pxor t0, t1 475cabdff1aSopenharmony_ci pxor t1, %1 476cabdff1aSopenharmony_ci pand t0, %8 477cabdff1aSopenharmony_ci pand t1, %7 478cabdff1aSopenharmony_ci pxor t0, t1 479cabdff1aSopenharmony_ci pxor t0, %1 480cabdff1aSopenharmony_ci SWAPMOVA %10, t0 481cabdff1aSopenharmony_ci SWAPMOVA %12, t2 482cabdff1aSopenharmony_ci%endmacro 483cabdff1aSopenharmony_ci 484cabdff1aSopenharmony_ci%macro LUMA_INTRA_INIT 1 485cabdff1aSopenharmony_ci %xdefine pad %1*mmsize+((gprsize*3) % mmsize)-(stack_offset&15) 486cabdff1aSopenharmony_ci %define t0 m4 487cabdff1aSopenharmony_ci %define t1 m5 488cabdff1aSopenharmony_ci %define t2 m6 489cabdff1aSopenharmony_ci %define t3 m7 490cabdff1aSopenharmony_ci %assign i 4 491cabdff1aSopenharmony_ci%rep %1 492cabdff1aSopenharmony_ci CAT_XDEFINE t, i, [rsp+mmsize*(i-4)] 493cabdff1aSopenharmony_ci %assign i i+1 494cabdff1aSopenharmony_ci%endrep 495cabdff1aSopenharmony_ci SUB rsp, pad 496cabdff1aSopenharmony_ci%endmacro 497cabdff1aSopenharmony_ci 498cabdff1aSopenharmony_ci; in: %1-%3=tmp, %4=p2, %5=q2 499cabdff1aSopenharmony_ci%macro LUMA_INTRA_INTER 5 500cabdff1aSopenharmony_ci LOAD_AB t0, t1, r2d, r3d 501cabdff1aSopenharmony_ci mova %1, t0 502cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3 503cabdff1aSopenharmony_ci%if ARCH_X86_64 504cabdff1aSopenharmony_ci mova %2, t0 ; mask0 505cabdff1aSopenharmony_ci psrlw t3, %1, 2 506cabdff1aSopenharmony_ci%else 507cabdff1aSopenharmony_ci mova t3, %1 508cabdff1aSopenharmony_ci mova %2, t0 ; mask0 509cabdff1aSopenharmony_ci psrlw t3, 2 510cabdff1aSopenharmony_ci%endif 511cabdff1aSopenharmony_ci paddw t3, [pw_2] ; alpha/4+2 512cabdff1aSopenharmony_ci DIFF_LT m1, m2, t3, t2, t0 ; t2 = |p0-q0| < alpha/4+2 513cabdff1aSopenharmony_ci pand t2, %2 514cabdff1aSopenharmony_ci mova t3, %5 ; q2 515cabdff1aSopenharmony_ci mova %1, t2 ; mask1 516cabdff1aSopenharmony_ci DIFF_LT t3, m2, t1, t2, t0 ; t2 = |q2-q0| < beta 517cabdff1aSopenharmony_ci pand t2, %1 518cabdff1aSopenharmony_ci mova t3, %4 ; p2 519cabdff1aSopenharmony_ci mova %3, t2 ; mask1q 520cabdff1aSopenharmony_ci DIFF_LT t3, m1, t1, t2, t0 ; t2 = |p2-p0| < beta 521cabdff1aSopenharmony_ci pand t2, %1 522cabdff1aSopenharmony_ci mova %1, t2 ; mask1p 523cabdff1aSopenharmony_ci%endmacro 524cabdff1aSopenharmony_ci 525cabdff1aSopenharmony_ci%macro LUMA_H_INTRA_LOAD 0 526cabdff1aSopenharmony_ci%if mmsize == 8 527cabdff1aSopenharmony_ci movu t0, [r0-8] 528cabdff1aSopenharmony_ci movu t1, [r0+r1-8] 529cabdff1aSopenharmony_ci movu m0, [r0+r1*2-8] 530cabdff1aSopenharmony_ci movu m1, [r0+r4-8] 531cabdff1aSopenharmony_ci TRANSPOSE4x4W 4, 5, 0, 1, 2 532cabdff1aSopenharmony_ci mova t4, t0 ; p3 533cabdff1aSopenharmony_ci mova t5, t1 ; p2 534cabdff1aSopenharmony_ci 535cabdff1aSopenharmony_ci movu m2, [r0] 536cabdff1aSopenharmony_ci movu m3, [r0+r1] 537cabdff1aSopenharmony_ci movu t0, [r0+r1*2] 538cabdff1aSopenharmony_ci movu t1, [r0+r4] 539cabdff1aSopenharmony_ci TRANSPOSE4x4W 2, 3, 4, 5, 6 540cabdff1aSopenharmony_ci mova t6, t0 ; q2 541cabdff1aSopenharmony_ci mova t7, t1 ; q3 542cabdff1aSopenharmony_ci%else 543cabdff1aSopenharmony_ci movu t0, [r0-8] 544cabdff1aSopenharmony_ci movu t1, [r0+r1-8] 545cabdff1aSopenharmony_ci movu m0, [r0+r1*2-8] 546cabdff1aSopenharmony_ci movu m1, [r0+r5-8] 547cabdff1aSopenharmony_ci movu m2, [r4-8] 548cabdff1aSopenharmony_ci movu m3, [r4+r1-8] 549cabdff1aSopenharmony_ci movu t2, [r4+r1*2-8] 550cabdff1aSopenharmony_ci movu t3, [r4+r5-8] 551cabdff1aSopenharmony_ci TRANSPOSE8x8W 4, 5, 0, 1, 2, 3, 6, 7, t4, t5 552cabdff1aSopenharmony_ci mova t4, t0 ; p3 553cabdff1aSopenharmony_ci mova t5, t1 ; p2 554cabdff1aSopenharmony_ci mova t6, t2 ; q2 555cabdff1aSopenharmony_ci mova t7, t3 ; q3 556cabdff1aSopenharmony_ci%endif 557cabdff1aSopenharmony_ci%endmacro 558cabdff1aSopenharmony_ci 559cabdff1aSopenharmony_ci; in: %1=q3 %2=q2' %3=q1' %4=q0' %5=p0' %6=p1' %7=p2' %8=p3 %9=tmp 560cabdff1aSopenharmony_ci%macro LUMA_H_INTRA_STORE 9 561cabdff1aSopenharmony_ci%if mmsize == 8 562cabdff1aSopenharmony_ci TRANSPOSE4x4W %1, %2, %3, %4, %9 563cabdff1aSopenharmony_ci movq [r0-8], m%1 564cabdff1aSopenharmony_ci movq [r0+r1-8], m%2 565cabdff1aSopenharmony_ci movq [r0+r1*2-8], m%3 566cabdff1aSopenharmony_ci movq [r0+r4-8], m%4 567cabdff1aSopenharmony_ci movq m%1, %8 568cabdff1aSopenharmony_ci TRANSPOSE4x4W %5, %6, %7, %1, %9 569cabdff1aSopenharmony_ci movq [r0], m%5 570cabdff1aSopenharmony_ci movq [r0+r1], m%6 571cabdff1aSopenharmony_ci movq [r0+r1*2], m%7 572cabdff1aSopenharmony_ci movq [r0+r4], m%1 573cabdff1aSopenharmony_ci%else 574cabdff1aSopenharmony_ci TRANSPOSE2x4x4W %1, %2, %3, %4, %9 575cabdff1aSopenharmony_ci movq [r0-8], m%1 576cabdff1aSopenharmony_ci movq [r0+r1-8], m%2 577cabdff1aSopenharmony_ci movq [r0+r1*2-8], m%3 578cabdff1aSopenharmony_ci movq [r0+r5-8], m%4 579cabdff1aSopenharmony_ci movhps [r4-8], m%1 580cabdff1aSopenharmony_ci movhps [r4+r1-8], m%2 581cabdff1aSopenharmony_ci movhps [r4+r1*2-8], m%3 582cabdff1aSopenharmony_ci movhps [r4+r5-8], m%4 583cabdff1aSopenharmony_ci%ifnum %8 584cabdff1aSopenharmony_ci SWAP %1, %8 585cabdff1aSopenharmony_ci%else 586cabdff1aSopenharmony_ci mova m%1, %8 587cabdff1aSopenharmony_ci%endif 588cabdff1aSopenharmony_ci TRANSPOSE2x4x4W %5, %6, %7, %1, %9 589cabdff1aSopenharmony_ci movq [r0], m%5 590cabdff1aSopenharmony_ci movq [r0+r1], m%6 591cabdff1aSopenharmony_ci movq [r0+r1*2], m%7 592cabdff1aSopenharmony_ci movq [r0+r5], m%1 593cabdff1aSopenharmony_ci movhps [r4], m%5 594cabdff1aSopenharmony_ci movhps [r4+r1], m%6 595cabdff1aSopenharmony_ci movhps [r4+r1*2], m%7 596cabdff1aSopenharmony_ci movhps [r4+r5], m%1 597cabdff1aSopenharmony_ci%endif 598cabdff1aSopenharmony_ci%endmacro 599cabdff1aSopenharmony_ci 600cabdff1aSopenharmony_ci%if ARCH_X86_64 601cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 602cabdff1aSopenharmony_ci; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha, 603cabdff1aSopenharmony_ci; int beta) 604cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 605cabdff1aSopenharmony_ci%macro DEBLOCK_LUMA_INTRA_64 0 606cabdff1aSopenharmony_cicglobal deblock_v_luma_intra_10, 4,7,16 607cabdff1aSopenharmony_ci %define t0 m1 608cabdff1aSopenharmony_ci %define t1 m2 609cabdff1aSopenharmony_ci %define t2 m4 610cabdff1aSopenharmony_ci %define p2 m8 611cabdff1aSopenharmony_ci %define p1 m9 612cabdff1aSopenharmony_ci %define p0 m10 613cabdff1aSopenharmony_ci %define q0 m11 614cabdff1aSopenharmony_ci %define q1 m12 615cabdff1aSopenharmony_ci %define q2 m13 616cabdff1aSopenharmony_ci %define aa m5 617cabdff1aSopenharmony_ci %define bb m14 618cabdff1aSopenharmony_ci lea r4, [r1*4] 619cabdff1aSopenharmony_ci lea r5, [r1*3] ; 3*stride 620cabdff1aSopenharmony_ci neg r4 621cabdff1aSopenharmony_ci add r4, r0 ; pix-4*stride 622cabdff1aSopenharmony_ci mov r6, 2 623cabdff1aSopenharmony_ci mova m0, [pw_2] 624cabdff1aSopenharmony_ci shl r2d, 2 625cabdff1aSopenharmony_ci shl r3d, 2 626cabdff1aSopenharmony_ci LOAD_AB aa, bb, r2d, r3d 627cabdff1aSopenharmony_ci.loop: 628cabdff1aSopenharmony_ci mova p2, [r4+r1] 629cabdff1aSopenharmony_ci mova p1, [r4+2*r1] 630cabdff1aSopenharmony_ci mova p0, [r4+r5] 631cabdff1aSopenharmony_ci mova q0, [r0] 632cabdff1aSopenharmony_ci mova q1, [r0+r1] 633cabdff1aSopenharmony_ci mova q2, [r0+2*r1] 634cabdff1aSopenharmony_ci 635cabdff1aSopenharmony_ci LOAD_MASK p1, p0, q0, q1, aa, bb, m3, t0, t1 636cabdff1aSopenharmony_ci mova t2, aa 637cabdff1aSopenharmony_ci psrlw t2, 2 638cabdff1aSopenharmony_ci paddw t2, m0 ; alpha/4+2 639cabdff1aSopenharmony_ci DIFF_LT p0, q0, t2, m6, t0 ; m6 = |p0-q0| < alpha/4+2 640cabdff1aSopenharmony_ci DIFF_LT p2, p0, bb, t1, t0 ; m7 = |p2-p0| < beta 641cabdff1aSopenharmony_ci DIFF_LT q2, q0, bb, m7, t0 ; t1 = |q2-q0| < beta 642cabdff1aSopenharmony_ci pand m6, m3 643cabdff1aSopenharmony_ci pand m7, m6 644cabdff1aSopenharmony_ci pand m6, t1 645cabdff1aSopenharmony_ci LUMA_INTRA_P012 p0, p1, p2, [r4], q0, q1, m3, m6, m0, [r4+r5], [r4+2*r1], [r4+r1] 646cabdff1aSopenharmony_ci LUMA_INTRA_P012 q0, q1, q2, [r0+r5], p0, p1, m3, m7, m0, [r0], [r0+r1], [r0+2*r1] 647cabdff1aSopenharmony_ci add r0, mmsize 648cabdff1aSopenharmony_ci add r4, mmsize 649cabdff1aSopenharmony_ci dec r6 650cabdff1aSopenharmony_ci jg .loop 651cabdff1aSopenharmony_ci REP_RET 652cabdff1aSopenharmony_ci 653cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 654cabdff1aSopenharmony_ci; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, 655cabdff1aSopenharmony_ci; int beta) 656cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 657cabdff1aSopenharmony_cicglobal deblock_h_luma_intra_10, 4,7,16 658cabdff1aSopenharmony_ci %define t0 m15 659cabdff1aSopenharmony_ci %define t1 m14 660cabdff1aSopenharmony_ci %define t2 m2 661cabdff1aSopenharmony_ci %define q3 m5 662cabdff1aSopenharmony_ci %define q2 m8 663cabdff1aSopenharmony_ci %define q1 m9 664cabdff1aSopenharmony_ci %define q0 m10 665cabdff1aSopenharmony_ci %define p0 m11 666cabdff1aSopenharmony_ci %define p1 m12 667cabdff1aSopenharmony_ci %define p2 m13 668cabdff1aSopenharmony_ci %define p3 m4 669cabdff1aSopenharmony_ci %define spill [rsp] 670cabdff1aSopenharmony_ci %assign pad 24-(stack_offset&15) 671cabdff1aSopenharmony_ci SUB rsp, pad 672cabdff1aSopenharmony_ci lea r4, [r1*4] 673cabdff1aSopenharmony_ci lea r5, [r1*3] ; 3*stride 674cabdff1aSopenharmony_ci add r4, r0 ; pix+4*stride 675cabdff1aSopenharmony_ci mov r6, 2 676cabdff1aSopenharmony_ci mova m0, [pw_2] 677cabdff1aSopenharmony_ci shl r2d, 2 678cabdff1aSopenharmony_ci shl r3d, 2 679cabdff1aSopenharmony_ci.loop: 680cabdff1aSopenharmony_ci movu q3, [r0-8] 681cabdff1aSopenharmony_ci movu q2, [r0+r1-8] 682cabdff1aSopenharmony_ci movu q1, [r0+r1*2-8] 683cabdff1aSopenharmony_ci movu q0, [r0+r5-8] 684cabdff1aSopenharmony_ci movu p0, [r4-8] 685cabdff1aSopenharmony_ci movu p1, [r4+r1-8] 686cabdff1aSopenharmony_ci movu p2, [r4+r1*2-8] 687cabdff1aSopenharmony_ci movu p3, [r4+r5-8] 688cabdff1aSopenharmony_ci TRANSPOSE8x8W 5, 8, 9, 10, 11, 12, 13, 4, 1 689cabdff1aSopenharmony_ci 690cabdff1aSopenharmony_ci LOAD_AB m1, m2, r2d, r3d 691cabdff1aSopenharmony_ci LOAD_MASK q1, q0, p0, p1, m1, m2, m3, t0, t1 692cabdff1aSopenharmony_ci psrlw m1, 2 693cabdff1aSopenharmony_ci paddw m1, m0 ; alpha/4+2 694cabdff1aSopenharmony_ci DIFF_LT p0, q0, m1, m6, t0 ; m6 = |p0-q0| < alpha/4+2 695cabdff1aSopenharmony_ci DIFF_LT q2, q0, m2, t1, t0 ; t1 = |q2-q0| < beta 696cabdff1aSopenharmony_ci DIFF_LT p0, p2, m2, m7, t0 ; m7 = |p2-p0| < beta 697cabdff1aSopenharmony_ci pand m6, m3 698cabdff1aSopenharmony_ci pand m7, m6 699cabdff1aSopenharmony_ci pand m6, t1 700cabdff1aSopenharmony_ci 701cabdff1aSopenharmony_ci mova spill, q3 702cabdff1aSopenharmony_ci LUMA_INTRA_P012 q0, q1, q2, q3, p0, p1, m3, m6, m0, m5, m1, q2 703cabdff1aSopenharmony_ci LUMA_INTRA_P012 p0, p1, p2, p3, q0, q1, m3, m7, m0, p0, m6, p2 704cabdff1aSopenharmony_ci mova m7, spill 705cabdff1aSopenharmony_ci 706cabdff1aSopenharmony_ci LUMA_H_INTRA_STORE 7, 8, 1, 5, 11, 6, 13, 4, 14 707cabdff1aSopenharmony_ci 708cabdff1aSopenharmony_ci lea r0, [r0+r1*8] 709cabdff1aSopenharmony_ci lea r4, [r4+r1*8] 710cabdff1aSopenharmony_ci dec r6 711cabdff1aSopenharmony_ci jg .loop 712cabdff1aSopenharmony_ci ADD rsp, pad 713cabdff1aSopenharmony_ci RET 714cabdff1aSopenharmony_ci%endmacro 715cabdff1aSopenharmony_ci 716cabdff1aSopenharmony_ciINIT_XMM sse2 717cabdff1aSopenharmony_ciDEBLOCK_LUMA_INTRA_64 718cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL 719cabdff1aSopenharmony_ciINIT_XMM avx 720cabdff1aSopenharmony_ciDEBLOCK_LUMA_INTRA_64 721cabdff1aSopenharmony_ci%endif 722cabdff1aSopenharmony_ci 723cabdff1aSopenharmony_ci%endif 724cabdff1aSopenharmony_ci 725cabdff1aSopenharmony_ci%macro DEBLOCK_LUMA_INTRA 0 726cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 727cabdff1aSopenharmony_ci; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha, 728cabdff1aSopenharmony_ci; int beta) 729cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 730cabdff1aSopenharmony_cicglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) 731cabdff1aSopenharmony_ci LUMA_INTRA_INIT 3 732cabdff1aSopenharmony_ci lea r4, [r1*4] 733cabdff1aSopenharmony_ci lea r5, [r1*3] 734cabdff1aSopenharmony_ci neg r4 735cabdff1aSopenharmony_ci add r4, r0 736cabdff1aSopenharmony_ci mov r6, 32/mmsize 737cabdff1aSopenharmony_ci shl r2d, 2 738cabdff1aSopenharmony_ci shl r3d, 2 739cabdff1aSopenharmony_ci.loop: 740cabdff1aSopenharmony_ci mova m0, [r4+r1*2] ; p1 741cabdff1aSopenharmony_ci mova m1, [r4+r5] ; p0 742cabdff1aSopenharmony_ci mova m2, [r0] ; q0 743cabdff1aSopenharmony_ci mova m3, [r0+r1] ; q1 744cabdff1aSopenharmony_ci LUMA_INTRA_INTER t4, t5, t6, [r4+r1], [r0+r1*2] 745cabdff1aSopenharmony_ci LUMA_INTRA_P012 m1, m0, t3, [r4], m2, m3, t5, t4, [pw_2], [r4+r5], [r4+2*r1], [r4+r1] 746cabdff1aSopenharmony_ci mova t3, [r0+r1*2] ; q2 747cabdff1aSopenharmony_ci LUMA_INTRA_P012 m2, m3, t3, [r0+r5], m1, m0, t5, t6, [pw_2], [r0], [r0+r1], [r0+2*r1] 748cabdff1aSopenharmony_ci add r0, mmsize 749cabdff1aSopenharmony_ci add r4, mmsize 750cabdff1aSopenharmony_ci dec r6 751cabdff1aSopenharmony_ci jg .loop 752cabdff1aSopenharmony_ci ADD rsp, pad 753cabdff1aSopenharmony_ci RET 754cabdff1aSopenharmony_ci 755cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 756cabdff1aSopenharmony_ci; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, 757cabdff1aSopenharmony_ci; int beta) 758cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 759cabdff1aSopenharmony_cicglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16) 760cabdff1aSopenharmony_ci LUMA_INTRA_INIT 8 761cabdff1aSopenharmony_ci%if mmsize == 8 762cabdff1aSopenharmony_ci lea r4, [r1*3] 763cabdff1aSopenharmony_ci mov r5, 32/mmsize 764cabdff1aSopenharmony_ci%else 765cabdff1aSopenharmony_ci lea r4, [r1*4] 766cabdff1aSopenharmony_ci lea r5, [r1*3] ; 3*stride 767cabdff1aSopenharmony_ci add r4, r0 ; pix+4*stride 768cabdff1aSopenharmony_ci mov r6, 32/mmsize 769cabdff1aSopenharmony_ci%endif 770cabdff1aSopenharmony_ci shl r2d, 2 771cabdff1aSopenharmony_ci shl r3d, 2 772cabdff1aSopenharmony_ci.loop: 773cabdff1aSopenharmony_ci LUMA_H_INTRA_LOAD 774cabdff1aSopenharmony_ci LUMA_INTRA_INTER t8, t9, t10, t5, t6 775cabdff1aSopenharmony_ci 776cabdff1aSopenharmony_ci LUMA_INTRA_P012 m1, m0, t3, t4, m2, m3, t9, t8, [pw_2], t8, t5, t11 777cabdff1aSopenharmony_ci mova t3, t6 ; q2 778cabdff1aSopenharmony_ci LUMA_INTRA_P012 m2, m3, t3, t7, m1, m0, t9, t10, [pw_2], m4, t6, m5 779cabdff1aSopenharmony_ci 780cabdff1aSopenharmony_ci mova m2, t4 781cabdff1aSopenharmony_ci mova m0, t11 782cabdff1aSopenharmony_ci mova m1, t5 783cabdff1aSopenharmony_ci mova m3, t8 784cabdff1aSopenharmony_ci mova m6, t6 785cabdff1aSopenharmony_ci 786cabdff1aSopenharmony_ci LUMA_H_INTRA_STORE 2, 0, 1, 3, 4, 6, 5, t7, 7 787cabdff1aSopenharmony_ci 788cabdff1aSopenharmony_ci lea r0, [r0+r1*(mmsize/2)] 789cabdff1aSopenharmony_ci%if mmsize == 8 790cabdff1aSopenharmony_ci dec r5 791cabdff1aSopenharmony_ci%else 792cabdff1aSopenharmony_ci lea r4, [r4+r1*(mmsize/2)] 793cabdff1aSopenharmony_ci dec r6 794cabdff1aSopenharmony_ci%endif 795cabdff1aSopenharmony_ci jg .loop 796cabdff1aSopenharmony_ci ADD rsp, pad 797cabdff1aSopenharmony_ci RET 798cabdff1aSopenharmony_ci%endmacro 799cabdff1aSopenharmony_ci 800cabdff1aSopenharmony_ci%if ARCH_X86_64 == 0 801cabdff1aSopenharmony_ci%if HAVE_ALIGNED_STACK == 0 802cabdff1aSopenharmony_ciINIT_MMX mmxext 803cabdff1aSopenharmony_ciDEBLOCK_LUMA 804cabdff1aSopenharmony_ciDEBLOCK_LUMA_INTRA 805cabdff1aSopenharmony_ci%endif 806cabdff1aSopenharmony_ciINIT_XMM sse2 807cabdff1aSopenharmony_ciDEBLOCK_LUMA 808cabdff1aSopenharmony_ciDEBLOCK_LUMA_INTRA 809cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL 810cabdff1aSopenharmony_ciINIT_XMM avx 811cabdff1aSopenharmony_ciDEBLOCK_LUMA 812cabdff1aSopenharmony_ciDEBLOCK_LUMA_INTRA 813cabdff1aSopenharmony_ci%endif 814cabdff1aSopenharmony_ci%endif 815cabdff1aSopenharmony_ci 816cabdff1aSopenharmony_ci; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp 817cabdff1aSopenharmony_ci; out: %1=p0', %2=q0' 818cabdff1aSopenharmony_ci%macro CHROMA_DEBLOCK_P0_Q0_INTRA 7 819cabdff1aSopenharmony_ci mova %6, [pw_2] 820cabdff1aSopenharmony_ci paddw %6, %3 821cabdff1aSopenharmony_ci paddw %6, %4 822cabdff1aSopenharmony_ci paddw %7, %6, %2 823cabdff1aSopenharmony_ci paddw %6, %1 824cabdff1aSopenharmony_ci paddw %6, %3 825cabdff1aSopenharmony_ci paddw %7, %4 826cabdff1aSopenharmony_ci psraw %6, 2 827cabdff1aSopenharmony_ci psraw %7, 2 828cabdff1aSopenharmony_ci psubw %6, %1 829cabdff1aSopenharmony_ci psubw %7, %2 830cabdff1aSopenharmony_ci pand %6, %5 831cabdff1aSopenharmony_ci pand %7, %5 832cabdff1aSopenharmony_ci paddw %1, %6 833cabdff1aSopenharmony_ci paddw %2, %7 834cabdff1aSopenharmony_ci%endmacro 835cabdff1aSopenharmony_ci 836cabdff1aSopenharmony_ci%macro CHROMA_V_LOAD 1 837cabdff1aSopenharmony_ci mova m0, [r0] ; p1 838cabdff1aSopenharmony_ci mova m1, [r0+r1] ; p0 839cabdff1aSopenharmony_ci mova m2, [%1] ; q0 840cabdff1aSopenharmony_ci mova m3, [%1+r1] ; q1 841cabdff1aSopenharmony_ci%endmacro 842cabdff1aSopenharmony_ci 843cabdff1aSopenharmony_ci%macro CHROMA_V_STORE 0 844cabdff1aSopenharmony_ci mova [r0+1*r1], m1 845cabdff1aSopenharmony_ci mova [r0+2*r1], m2 846cabdff1aSopenharmony_ci%endmacro 847cabdff1aSopenharmony_ci 848cabdff1aSopenharmony_ci; in: 8 rows of 4 words in %4..%11 849cabdff1aSopenharmony_ci; out: 4 rows of 8 words in m0..m3 850cabdff1aSopenharmony_ci%macro TRANSPOSE4x8W_LOAD 8 851cabdff1aSopenharmony_ci movq m0, %1 852cabdff1aSopenharmony_ci movq m2, %2 853cabdff1aSopenharmony_ci movq m1, %3 854cabdff1aSopenharmony_ci movq m3, %4 855cabdff1aSopenharmony_ci 856cabdff1aSopenharmony_ci punpcklwd m0, m2 857cabdff1aSopenharmony_ci punpcklwd m1, m3 858cabdff1aSopenharmony_ci punpckhdq m2, m0, m1 859cabdff1aSopenharmony_ci punpckldq m0, m1 860cabdff1aSopenharmony_ci 861cabdff1aSopenharmony_ci movq m4, %5 862cabdff1aSopenharmony_ci movq m6, %6 863cabdff1aSopenharmony_ci movq m5, %7 864cabdff1aSopenharmony_ci movq m3, %8 865cabdff1aSopenharmony_ci 866cabdff1aSopenharmony_ci punpcklwd m4, m6 867cabdff1aSopenharmony_ci punpcklwd m5, m3 868cabdff1aSopenharmony_ci punpckhdq m6, m4, m5 869cabdff1aSopenharmony_ci punpckldq m4, m5 870cabdff1aSopenharmony_ci 871cabdff1aSopenharmony_ci punpckhqdq m1, m0, m4 872cabdff1aSopenharmony_ci punpcklqdq m0, m4 873cabdff1aSopenharmony_ci punpckhqdq m3, m2, m6 874cabdff1aSopenharmony_ci punpcklqdq m2, m6 875cabdff1aSopenharmony_ci%endmacro 876cabdff1aSopenharmony_ci 877cabdff1aSopenharmony_ci; in: 4 rows of 8 words in m0..m3 878cabdff1aSopenharmony_ci; out: 8 rows of 4 words in %1..%8 879cabdff1aSopenharmony_ci%macro TRANSPOSE8x4W_STORE 8 880cabdff1aSopenharmony_ci TRANSPOSE4x4W 0, 1, 2, 3, 4 881cabdff1aSopenharmony_ci movq %1, m0 882cabdff1aSopenharmony_ci movhps %2, m0 883cabdff1aSopenharmony_ci movq %3, m1 884cabdff1aSopenharmony_ci movhps %4, m1 885cabdff1aSopenharmony_ci movq %5, m2 886cabdff1aSopenharmony_ci movhps %6, m2 887cabdff1aSopenharmony_ci movq %7, m3 888cabdff1aSopenharmony_ci movhps %8, m3 889cabdff1aSopenharmony_ci%endmacro 890cabdff1aSopenharmony_ci 891cabdff1aSopenharmony_ci; %1 = base + 3*stride 892cabdff1aSopenharmony_ci; %2 = 3*stride (unused on mmx) 893cabdff1aSopenharmony_ci; %3, %4 = place to store p1 and q1 values 894cabdff1aSopenharmony_ci%macro CHROMA_H_LOAD 4 895cabdff1aSopenharmony_ci %if mmsize == 8 896cabdff1aSopenharmony_ci movq m0, [pix_q - 4] 897cabdff1aSopenharmony_ci movq m1, [pix_q + stride_q - 4] 898cabdff1aSopenharmony_ci movq m2, [pix_q + 2*stride_q - 4] 899cabdff1aSopenharmony_ci movq m3, [%1 - 4] 900cabdff1aSopenharmony_ci TRANSPOSE4x4W 0, 1, 2, 3, 4 901cabdff1aSopenharmony_ci %else 902cabdff1aSopenharmony_ci TRANSPOSE4x8W_LOAD PASS8ROWS(pix_q-4, %1-4, stride_q, %2) 903cabdff1aSopenharmony_ci %endif 904cabdff1aSopenharmony_ci mova %3, m0 905cabdff1aSopenharmony_ci mova %4, m3 906cabdff1aSopenharmony_ci%endmacro 907cabdff1aSopenharmony_ci 908cabdff1aSopenharmony_ci; %1 = base + 3*stride 909cabdff1aSopenharmony_ci; %2 = 3*stride (unused on mmx) 910cabdff1aSopenharmony_ci; %3, %4 = place to load p1 and q1 values 911cabdff1aSopenharmony_ci%macro CHROMA_H_STORE 4 912cabdff1aSopenharmony_ci mova m0, %3 913cabdff1aSopenharmony_ci mova m3, %4 914cabdff1aSopenharmony_ci %if mmsize == 8 915cabdff1aSopenharmony_ci TRANSPOSE4x4W 0, 1, 2, 3, 4 916cabdff1aSopenharmony_ci movq [pix_q - 4], m0 917cabdff1aSopenharmony_ci movq [pix_q + stride_q - 4], m1 918cabdff1aSopenharmony_ci movq [pix_q + 2*stride_q - 4], m2 919cabdff1aSopenharmony_ci movq [%1 - 4], m3 920cabdff1aSopenharmony_ci %else 921cabdff1aSopenharmony_ci TRANSPOSE8x4W_STORE PASS8ROWS(pix_q-4, %1-4, stride_q, %2) 922cabdff1aSopenharmony_ci %endif 923cabdff1aSopenharmony_ci%endmacro 924cabdff1aSopenharmony_ci 925cabdff1aSopenharmony_ci%macro CHROMA_V_LOAD_TC 2 926cabdff1aSopenharmony_ci movd %1, [%2] 927cabdff1aSopenharmony_ci punpcklbw %1, %1 928cabdff1aSopenharmony_ci punpcklwd %1, %1 929cabdff1aSopenharmony_ci psraw %1, 6 930cabdff1aSopenharmony_ci%endmacro 931cabdff1aSopenharmony_ci 932cabdff1aSopenharmony_ci%macro DEBLOCK_CHROMA 0 933cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 934cabdff1aSopenharmony_ci; void ff_deblock_v_chroma_10(uint16_t *pix, int stride, int alpha, int beta, 935cabdff1aSopenharmony_ci; int8_t *tc0) 936cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 937cabdff1aSopenharmony_cicglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) 938cabdff1aSopenharmony_ci mov r5, r0 939cabdff1aSopenharmony_ci sub r0, r1 940cabdff1aSopenharmony_ci sub r0, r1 941cabdff1aSopenharmony_ci shl r2d, 2 942cabdff1aSopenharmony_ci shl r3d, 2 943cabdff1aSopenharmony_ci CHROMA_V_LOAD r5 944cabdff1aSopenharmony_ci LOAD_AB m4, m5, r2d, r3d 945cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 946cabdff1aSopenharmony_ci pxor m4, m4 947cabdff1aSopenharmony_ci CHROMA_V_LOAD_TC m6, r4 948cabdff1aSopenharmony_ci psubw m6, [pw_3] 949cabdff1aSopenharmony_ci pmaxsw m6, m4 950cabdff1aSopenharmony_ci pand m7, m6 951cabdff1aSopenharmony_ci DEBLOCK_P0_Q0 m1, m2, m0, m3, m7, m5, m6 952cabdff1aSopenharmony_ci CHROMA_V_STORE 953cabdff1aSopenharmony_ci RET 954cabdff1aSopenharmony_ci 955cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 956cabdff1aSopenharmony_ci; void ff_deblock_v_chroma_intra_10(uint16_t *pix, int stride, int alpha, 957cabdff1aSopenharmony_ci; int beta) 958cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 959cabdff1aSopenharmony_cicglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) 960cabdff1aSopenharmony_ci mov r4, r0 961cabdff1aSopenharmony_ci sub r0, r1 962cabdff1aSopenharmony_ci sub r0, r1 963cabdff1aSopenharmony_ci shl r2d, 2 964cabdff1aSopenharmony_ci shl r3d, 2 965cabdff1aSopenharmony_ci CHROMA_V_LOAD r4 966cabdff1aSopenharmony_ci LOAD_AB m4, m5, r2d, r3d 967cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 968cabdff1aSopenharmony_ci CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6 969cabdff1aSopenharmony_ci CHROMA_V_STORE 970cabdff1aSopenharmony_ci RET 971cabdff1aSopenharmony_ci 972cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 973cabdff1aSopenharmony_ci; void ff_deblock_h_chroma_10(uint16_t *pix, int stride, int alpha, int beta, 974cabdff1aSopenharmony_ci; int8_t *tc0) 975cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 976cabdff1aSopenharmony_cicglobal deblock_h_chroma_10, 5, 7, 8, 0-2*mmsize, pix_, stride_, alpha_, beta_, tc0_ 977cabdff1aSopenharmony_ci shl alpha_d, 2 978cabdff1aSopenharmony_ci shl beta_d, 2 979cabdff1aSopenharmony_ci mov r5, pix_q 980cabdff1aSopenharmony_ci lea r6, [3*stride_q] 981cabdff1aSopenharmony_ci add r5, r6 982cabdff1aSopenharmony_ci 983cabdff1aSopenharmony_ci CHROMA_H_LOAD r5, r6, [rsp], [rsp + mmsize] 984cabdff1aSopenharmony_ci LOAD_AB m4, m5, alpha_d, beta_d 985cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 986cabdff1aSopenharmony_ci pxor m4, m4 987cabdff1aSopenharmony_ci CHROMA_V_LOAD_TC m6, tc0_q 988cabdff1aSopenharmony_ci psubw m6, [pw_3] 989cabdff1aSopenharmony_ci pmaxsw m6, m4 990cabdff1aSopenharmony_ci pand m7, m6 991cabdff1aSopenharmony_ci DEBLOCK_P0_Q0 m1, m2, m0, m3, m7, m5, m6 992cabdff1aSopenharmony_ci CHROMA_H_STORE r5, r6, [rsp], [rsp + mmsize] 993cabdff1aSopenharmony_ci 994cabdff1aSopenharmony_ciRET 995cabdff1aSopenharmony_ci 996cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 997cabdff1aSopenharmony_ci; void ff_deblock_h_chroma422_10(uint16_t *pix, int stride, int alpha, int beta, 998cabdff1aSopenharmony_ci; int8_t *tc0) 999cabdff1aSopenharmony_ci;----------------------------------------------------------------------------- 1000cabdff1aSopenharmony_cicglobal deblock_h_chroma422_10, 5, 7, 8, 0-3*mmsize, pix_, stride_, alpha_, beta_, tc0_ 1001cabdff1aSopenharmony_ci shl alpha_d, 2 1002cabdff1aSopenharmony_ci shl beta_d, 2 1003cabdff1aSopenharmony_ci 1004cabdff1aSopenharmony_ci movd m0, [tc0_q] 1005cabdff1aSopenharmony_ci punpcklbw m0, m0 1006cabdff1aSopenharmony_ci psraw m0, 6 1007cabdff1aSopenharmony_ci movq [rsp], m0 1008cabdff1aSopenharmony_ci 1009cabdff1aSopenharmony_ci mov r5, pix_q 1010cabdff1aSopenharmony_ci lea r6, [3*stride_q] 1011cabdff1aSopenharmony_ci add r5, r6 1012cabdff1aSopenharmony_ci 1013cabdff1aSopenharmony_ci mov r4, -8 1014cabdff1aSopenharmony_ci .loop: 1015cabdff1aSopenharmony_ci 1016cabdff1aSopenharmony_ci CHROMA_H_LOAD r5, r6, [rsp + 1*mmsize], [rsp + 2*mmsize] 1017cabdff1aSopenharmony_ci LOAD_AB m4, m5, alpha_d, beta_d 1018cabdff1aSopenharmony_ci LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 1019cabdff1aSopenharmony_ci pxor m4, m4 1020cabdff1aSopenharmony_ci movd m6, [rsp + r4 + 8] 1021cabdff1aSopenharmony_ci punpcklwd m6, m6 1022cabdff1aSopenharmony_ci punpcklwd m6, m6 1023cabdff1aSopenharmony_ci psubw m6, [pw_3] 1024cabdff1aSopenharmony_ci pmaxsw m6, m4 1025cabdff1aSopenharmony_ci pand m7, m6 1026cabdff1aSopenharmony_ci DEBLOCK_P0_Q0 m1, m2, m0, m3, m7, m5, m6 1027cabdff1aSopenharmony_ci CHROMA_H_STORE r5, r6, [rsp + 1*mmsize], [rsp + 2*mmsize] 1028cabdff1aSopenharmony_ci 1029cabdff1aSopenharmony_ci lea pix_q, [pix_q + (mmsize/2)*stride_q] 1030cabdff1aSopenharmony_ci lea r5, [r5 + (mmsize/2)*stride_q] 1031cabdff1aSopenharmony_ci add r4, (mmsize/4) 1032cabdff1aSopenharmony_ci jl .loop 1033cabdff1aSopenharmony_ciRET 1034cabdff1aSopenharmony_ci 1035cabdff1aSopenharmony_ci%endmacro 1036cabdff1aSopenharmony_ci 1037cabdff1aSopenharmony_ciINIT_XMM sse2 1038cabdff1aSopenharmony_ciDEBLOCK_CHROMA 1039cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL 1040cabdff1aSopenharmony_ciINIT_XMM avx 1041cabdff1aSopenharmony_ciDEBLOCK_CHROMA 1042cabdff1aSopenharmony_ci%endif 1043