1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include <stdint.h>
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h"
24cabdff1aSopenharmony_ci#include "libswscale/swscale_internal.h"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#undef REAL_MOVNTQ
27cabdff1aSopenharmony_ci#undef MOVNTQ
28cabdff1aSopenharmony_ci#undef MOVNTQ2
29cabdff1aSopenharmony_ci#undef PREFETCH
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_ci#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
33cabdff1aSopenharmony_ci#define MOVNTQ2 "movntq "
34cabdff1aSopenharmony_ci#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_ci#define YSCALEYUV2PACKEDX_UV \
37cabdff1aSopenharmony_ci    __asm__ volatile(\
38cabdff1aSopenharmony_ci        "xor                %%"FF_REG_a", %%"FF_REG_a"  \n\t"\
39cabdff1aSopenharmony_ci        ".p2align                      4                \n\t"\
40cabdff1aSopenharmony_ci        "nop                                            \n\t"\
41cabdff1aSopenharmony_ci        "1:                                             \n\t"\
42cabdff1aSopenharmony_ci        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d"  \n\t"\
43cabdff1aSopenharmony_ci        "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
44cabdff1aSopenharmony_ci        "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
45cabdff1aSopenharmony_ci        "movq                      %%mm3, %%mm4         \n\t"\
46cabdff1aSopenharmony_ci        ".p2align                      4                \n\t"\
47cabdff1aSopenharmony_ci        "2:                                             \n\t"\
48cabdff1aSopenharmony_ci        "movq            8(%%"FF_REG_d"), %%mm0         \n\t" /* filterCoeff */\
49cabdff1aSopenharmony_ci        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm2      \n\t" /* UsrcData */\
50cabdff1aSopenharmony_ci        "add                          %6, %%"FF_REG_S"  \n\t" \
51cabdff1aSopenharmony_ci        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm5      \n\t" /* VsrcData */\
52cabdff1aSopenharmony_ci        "add                         $16, %%"FF_REG_d"  \n\t"\
53cabdff1aSopenharmony_ci        "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
54cabdff1aSopenharmony_ci        "pmulhw                    %%mm0, %%mm2         \n\t"\
55cabdff1aSopenharmony_ci        "pmulhw                    %%mm0, %%mm5         \n\t"\
56cabdff1aSopenharmony_ci        "paddw                     %%mm2, %%mm3         \n\t"\
57cabdff1aSopenharmony_ci        "paddw                     %%mm5, %%mm4         \n\t"\
58cabdff1aSopenharmony_ci        "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
59cabdff1aSopenharmony_ci        " jnz                         2b                \n\t"\
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
62cabdff1aSopenharmony_ci    "lea                "offset"(%0), %%"FF_REG_d"  \n\t"\
63cabdff1aSopenharmony_ci    "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
64cabdff1aSopenharmony_ci    "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
65cabdff1aSopenharmony_ci    "movq                    "#dst1", "#dst2"       \n\t"\
66cabdff1aSopenharmony_ci    ".p2align                      4                \n\t"\
67cabdff1aSopenharmony_ci    "2:                                             \n\t"\
68cabdff1aSopenharmony_ci    "movq            8(%%"FF_REG_d"), "#coeff"      \n\t" /* filterCoeff */\
69cabdff1aSopenharmony_ci    "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
70cabdff1aSopenharmony_ci    "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
71cabdff1aSopenharmony_ci    "add                         $16, %%"FF_REG_d"  \n\t"\
72cabdff1aSopenharmony_ci    "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
73cabdff1aSopenharmony_ci    "pmulhw                 "#coeff", "#src1"       \n\t"\
74cabdff1aSopenharmony_ci    "pmulhw                 "#coeff", "#src2"       \n\t"\
75cabdff1aSopenharmony_ci    "paddw                   "#src1", "#dst1"       \n\t"\
76cabdff1aSopenharmony_ci    "paddw                   "#src2", "#dst2"       \n\t"\
77cabdff1aSopenharmony_ci    "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
78cabdff1aSopenharmony_ci    " jnz                         2b                \n\t"\
79cabdff1aSopenharmony_ci
80cabdff1aSopenharmony_ci#define YSCALEYUV2PACKEDX \
81cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_UV \
82cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
83cabdff1aSopenharmony_ci
84cabdff1aSopenharmony_ci#define YSCALEYUV2PACKEDX_END                     \
85cabdff1aSopenharmony_ci        :: "r" (&c->redDither),                   \
86cabdff1aSopenharmony_ci            "m" (dummy), "m" (dummy), "m" (dummy),\
87cabdff1aSopenharmony_ci            "r" (dest), "m" (dstW_reg), "m"(uv_off) \
88cabdff1aSopenharmony_ci            NAMED_CONSTRAINTS_ADD(bF8,bFC) \
89cabdff1aSopenharmony_ci        : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S            \
90cabdff1aSopenharmony_ci    );
91cabdff1aSopenharmony_ci
92cabdff1aSopenharmony_ci#define YSCALEYUV2PACKEDX_ACCURATE_UV \
93cabdff1aSopenharmony_ci    __asm__ volatile(\
94cabdff1aSopenharmony_ci        "xor %%"FF_REG_a", %%"FF_REG_a"                 \n\t"\
95cabdff1aSopenharmony_ci        ".p2align                      4                \n\t"\
96cabdff1aSopenharmony_ci        "nop                                            \n\t"\
97cabdff1aSopenharmony_ci        "1:                                             \n\t"\
98cabdff1aSopenharmony_ci        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d"  \n\t"\
99cabdff1aSopenharmony_ci        "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
100cabdff1aSopenharmony_ci        "pxor                      %%mm4, %%mm4         \n\t"\
101cabdff1aSopenharmony_ci        "pxor                      %%mm5, %%mm5         \n\t"\
102cabdff1aSopenharmony_ci        "pxor                      %%mm6, %%mm6         \n\t"\
103cabdff1aSopenharmony_ci        "pxor                      %%mm7, %%mm7         \n\t"\
104cabdff1aSopenharmony_ci        ".p2align                      4                \n\t"\
105cabdff1aSopenharmony_ci        "2:                                             \n\t"\
106cabdff1aSopenharmony_ci        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm0      \n\t" /* UsrcData */\
107cabdff1aSopenharmony_ci        "add                          %6, %%"FF_REG_S"  \n\t" \
108cabdff1aSopenharmony_ci        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm2      \n\t" /* VsrcData */\
109cabdff1aSopenharmony_ci        "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
110cabdff1aSopenharmony_ci        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm1      \n\t" /* UsrcData */\
111cabdff1aSopenharmony_ci        "movq                      %%mm0, %%mm3         \n\t"\
112cabdff1aSopenharmony_ci        "punpcklwd                 %%mm1, %%mm0         \n\t"\
113cabdff1aSopenharmony_ci        "punpckhwd                 %%mm1, %%mm3         \n\t"\
114cabdff1aSopenharmony_ci        "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1      \n\t" /* filterCoeff */\
115cabdff1aSopenharmony_ci        "pmaddwd                   %%mm1, %%mm0         \n\t"\
116cabdff1aSopenharmony_ci        "pmaddwd                   %%mm1, %%mm3         \n\t"\
117cabdff1aSopenharmony_ci        "paddd                     %%mm0, %%mm4         \n\t"\
118cabdff1aSopenharmony_ci        "paddd                     %%mm3, %%mm5         \n\t"\
119cabdff1aSopenharmony_ci        "add                          %6, %%"FF_REG_S"  \n\t" \
120cabdff1aSopenharmony_ci        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm3      \n\t" /* VsrcData */\
121cabdff1aSopenharmony_ci        "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
122cabdff1aSopenharmony_ci        "add           $"STR(APCK_SIZE)", %%"FF_REG_d"  \n\t"\
123cabdff1aSopenharmony_ci        "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
124cabdff1aSopenharmony_ci        "movq                      %%mm2, %%mm0         \n\t"\
125cabdff1aSopenharmony_ci        "punpcklwd                 %%mm3, %%mm2         \n\t"\
126cabdff1aSopenharmony_ci        "punpckhwd                 %%mm3, %%mm0         \n\t"\
127cabdff1aSopenharmony_ci        "pmaddwd                   %%mm1, %%mm2         \n\t"\
128cabdff1aSopenharmony_ci        "pmaddwd                   %%mm1, %%mm0         \n\t"\
129cabdff1aSopenharmony_ci        "paddd                     %%mm2, %%mm6         \n\t"\
130cabdff1aSopenharmony_ci        "paddd                     %%mm0, %%mm7         \n\t"\
131cabdff1aSopenharmony_ci        " jnz                         2b                \n\t"\
132cabdff1aSopenharmony_ci        "psrad                       $16, %%mm4         \n\t"\
133cabdff1aSopenharmony_ci        "psrad                       $16, %%mm5         \n\t"\
134cabdff1aSopenharmony_ci        "psrad                       $16, %%mm6         \n\t"\
135cabdff1aSopenharmony_ci        "psrad                       $16, %%mm7         \n\t"\
136cabdff1aSopenharmony_ci        "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
137cabdff1aSopenharmony_ci        "packssdw                  %%mm5, %%mm4         \n\t"\
138cabdff1aSopenharmony_ci        "packssdw                  %%mm7, %%mm6         \n\t"\
139cabdff1aSopenharmony_ci        "paddw                     %%mm0, %%mm4         \n\t"\
140cabdff1aSopenharmony_ci        "paddw                     %%mm0, %%mm6         \n\t"\
141cabdff1aSopenharmony_ci        "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
142cabdff1aSopenharmony_ci        "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
143cabdff1aSopenharmony_ci
144cabdff1aSopenharmony_ci#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
145cabdff1aSopenharmony_ci    "lea                "offset"(%0), %%"FF_REG_d"      \n\t"\
146cabdff1aSopenharmony_ci    "mov                 (%%"FF_REG_d"), %%"FF_REG_S"   \n\t"\
147cabdff1aSopenharmony_ci    "pxor                      %%mm1, %%mm1         \n\t"\
148cabdff1aSopenharmony_ci    "pxor                      %%mm5, %%mm5         \n\t"\
149cabdff1aSopenharmony_ci    "pxor                      %%mm7, %%mm7         \n\t"\
150cabdff1aSopenharmony_ci    "pxor                      %%mm6, %%mm6         \n\t"\
151cabdff1aSopenharmony_ci    ".p2align                      4                \n\t"\
152cabdff1aSopenharmony_ci    "2:                                             \n\t"\
153cabdff1aSopenharmony_ci    "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0       \n\t" /* Y1srcData */\
154cabdff1aSopenharmony_ci    "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2       \n\t" /* Y2srcData */\
155cabdff1aSopenharmony_ci    "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S"   \n\t"\
156cabdff1aSopenharmony_ci    "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4       \n\t" /* Y1srcData */\
157cabdff1aSopenharmony_ci    "movq                      %%mm0, %%mm3         \n\t"\
158cabdff1aSopenharmony_ci    "punpcklwd                 %%mm4, %%mm0         \n\t"\
159cabdff1aSopenharmony_ci    "punpckhwd                 %%mm4, %%mm3         \n\t"\
160cabdff1aSopenharmony_ci    "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4     \n\t" /* filterCoeff */\
161cabdff1aSopenharmony_ci    "pmaddwd                   %%mm4, %%mm0         \n\t"\
162cabdff1aSopenharmony_ci    "pmaddwd                   %%mm4, %%mm3         \n\t"\
163cabdff1aSopenharmony_ci    "paddd                     %%mm0, %%mm1         \n\t"\
164cabdff1aSopenharmony_ci    "paddd                     %%mm3, %%mm5         \n\t"\
165cabdff1aSopenharmony_ci    "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3   \n\t" /* Y2srcData */\
166cabdff1aSopenharmony_ci    "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
167cabdff1aSopenharmony_ci    "add           $"STR(APCK_SIZE)", %%"FF_REG_d"  \n\t"\
168cabdff1aSopenharmony_ci    "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
169cabdff1aSopenharmony_ci    "movq                      %%mm2, %%mm0         \n\t"\
170cabdff1aSopenharmony_ci    "punpcklwd                 %%mm3, %%mm2         \n\t"\
171cabdff1aSopenharmony_ci    "punpckhwd                 %%mm3, %%mm0         \n\t"\
172cabdff1aSopenharmony_ci    "pmaddwd                   %%mm4, %%mm2         \n\t"\
173cabdff1aSopenharmony_ci    "pmaddwd                   %%mm4, %%mm0         \n\t"\
174cabdff1aSopenharmony_ci    "paddd                     %%mm2, %%mm7         \n\t"\
175cabdff1aSopenharmony_ci    "paddd                     %%mm0, %%mm6         \n\t"\
176cabdff1aSopenharmony_ci    " jnz                         2b                \n\t"\
177cabdff1aSopenharmony_ci    "psrad                       $16, %%mm1         \n\t"\
178cabdff1aSopenharmony_ci    "psrad                       $16, %%mm5         \n\t"\
179cabdff1aSopenharmony_ci    "psrad                       $16, %%mm7         \n\t"\
180cabdff1aSopenharmony_ci    "psrad                       $16, %%mm6         \n\t"\
181cabdff1aSopenharmony_ci    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
182cabdff1aSopenharmony_ci    "packssdw                  %%mm5, %%mm1         \n\t"\
183cabdff1aSopenharmony_ci    "packssdw                  %%mm6, %%mm7         \n\t"\
184cabdff1aSopenharmony_ci    "paddw                     %%mm0, %%mm1         \n\t"\
185cabdff1aSopenharmony_ci    "paddw                     %%mm0, %%mm7         \n\t"\
186cabdff1aSopenharmony_ci    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
187cabdff1aSopenharmony_ci    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
188cabdff1aSopenharmony_ci
189cabdff1aSopenharmony_ci#define YSCALEYUV2PACKEDX_ACCURATE \
190cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_ACCURATE_UV \
191cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
192cabdff1aSopenharmony_ci
193cabdff1aSopenharmony_ci#define YSCALEYUV2RGBX \
194cabdff1aSopenharmony_ci    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
195cabdff1aSopenharmony_ci    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
196cabdff1aSopenharmony_ci    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
197cabdff1aSopenharmony_ci    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
198cabdff1aSopenharmony_ci    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
199cabdff1aSopenharmony_ci    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
200cabdff1aSopenharmony_ci    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
201cabdff1aSopenharmony_ci    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
202cabdff1aSopenharmony_ci    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
203cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
204cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
205cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
206cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
207cabdff1aSopenharmony_ci    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
208cabdff1aSopenharmony_ci    "paddw           %%mm3, %%mm4       \n\t"\
209cabdff1aSopenharmony_ci    "movq            %%mm2, %%mm0       \n\t"\
210cabdff1aSopenharmony_ci    "movq            %%mm5, %%mm6       \n\t"\
211cabdff1aSopenharmony_ci    "movq            %%mm4, %%mm3       \n\t"\
212cabdff1aSopenharmony_ci    "punpcklwd       %%mm2, %%mm2       \n\t"\
213cabdff1aSopenharmony_ci    "punpcklwd       %%mm5, %%mm5       \n\t"\
214cabdff1aSopenharmony_ci    "punpcklwd       %%mm4, %%mm4       \n\t"\
215cabdff1aSopenharmony_ci    "paddw           %%mm1, %%mm2       \n\t"\
216cabdff1aSopenharmony_ci    "paddw           %%mm1, %%mm5       \n\t"\
217cabdff1aSopenharmony_ci    "paddw           %%mm1, %%mm4       \n\t"\
218cabdff1aSopenharmony_ci    "punpckhwd       %%mm0, %%mm0       \n\t"\
219cabdff1aSopenharmony_ci    "punpckhwd       %%mm6, %%mm6       \n\t"\
220cabdff1aSopenharmony_ci    "punpckhwd       %%mm3, %%mm3       \n\t"\
221cabdff1aSopenharmony_ci    "paddw           %%mm7, %%mm0       \n\t"\
222cabdff1aSopenharmony_ci    "paddw           %%mm7, %%mm6       \n\t"\
223cabdff1aSopenharmony_ci    "paddw           %%mm7, %%mm3       \n\t"\
224cabdff1aSopenharmony_ci    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
225cabdff1aSopenharmony_ci    "packuswb        %%mm0, %%mm2       \n\t"\
226cabdff1aSopenharmony_ci    "packuswb        %%mm6, %%mm5       \n\t"\
227cabdff1aSopenharmony_ci    "packuswb        %%mm3, %%mm4       \n\t"\
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
230cabdff1aSopenharmony_ci    "movq       "#b", "#q2"     \n\t" /* B */\
231cabdff1aSopenharmony_ci    "movq       "#r", "#t"      \n\t" /* R */\
232cabdff1aSopenharmony_ci    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
233cabdff1aSopenharmony_ci    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
234cabdff1aSopenharmony_ci    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
235cabdff1aSopenharmony_ci    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
236cabdff1aSopenharmony_ci    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
237cabdff1aSopenharmony_ci    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
238cabdff1aSopenharmony_ci    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
239cabdff1aSopenharmony_ci    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
240cabdff1aSopenharmony_ci    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
241cabdff1aSopenharmony_ci    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
242cabdff1aSopenharmony_ci\
243cabdff1aSopenharmony_ci    MOVNTQ(   q0,   (dst, index, 4))\
244cabdff1aSopenharmony_ci    MOVNTQ(    b,  8(dst, index, 4))\
245cabdff1aSopenharmony_ci    MOVNTQ(   q2, 16(dst, index, 4))\
246cabdff1aSopenharmony_ci    MOVNTQ(   q3, 24(dst, index, 4))\
247cabdff1aSopenharmony_ci\
248cabdff1aSopenharmony_ci    "add      $8, "#index"      \n\t"\
249cabdff1aSopenharmony_ci    "cmp  "dstw", "#index"      \n\t"\
250cabdff1aSopenharmony_ci    " jb      1b                \n\t"
251cabdff1aSopenharmony_ci#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
252cabdff1aSopenharmony_ci
253cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
254cabdff1aSopenharmony_ci                                   const int16_t **lumSrc, int lumFilterSize,
255cabdff1aSopenharmony_ci                                   const int16_t *chrFilter, const int16_t **chrUSrc,
256cabdff1aSopenharmony_ci                                   const int16_t **chrVSrc,
257cabdff1aSopenharmony_ci                                   int chrFilterSize, const int16_t **alpSrc,
258cabdff1aSopenharmony_ci                                   uint8_t *dest, int dstW, int dstY)
259cabdff1aSopenharmony_ci{
260cabdff1aSopenharmony_ci    x86_reg dummy=0;
261cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
262cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
263cabdff1aSopenharmony_ci
264cabdff1aSopenharmony_ci    if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
265cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_ACCURATE
266cabdff1aSopenharmony_ci        YSCALEYUV2RGBX
267cabdff1aSopenharmony_ci        "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
268cabdff1aSopenharmony_ci        "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
269cabdff1aSopenharmony_ci        "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
270cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
271cabdff1aSopenharmony_ci        "movq               "Y_TEMP"(%0), %%mm5         \n\t"
272cabdff1aSopenharmony_ci        "psraw                        $3, %%mm1         \n\t"
273cabdff1aSopenharmony_ci        "psraw                        $3, %%mm7         \n\t"
274cabdff1aSopenharmony_ci        "packuswb                  %%mm7, %%mm1         \n\t"
275cabdff1aSopenharmony_ci        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
276cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_END
277cabdff1aSopenharmony_ci    } else {
278cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_ACCURATE
279cabdff1aSopenharmony_ci        YSCALEYUV2RGBX
280cabdff1aSopenharmony_ci        "pcmpeqd %%mm7, %%mm7 \n\t"
281cabdff1aSopenharmony_ci        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
282cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_END
283cabdff1aSopenharmony_ci    }
284cabdff1aSopenharmony_ci}
285cabdff1aSopenharmony_ci
286cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
287cabdff1aSopenharmony_ci                                const int16_t **lumSrc, int lumFilterSize,
288cabdff1aSopenharmony_ci                                const int16_t *chrFilter, const int16_t **chrUSrc,
289cabdff1aSopenharmony_ci                                const int16_t **chrVSrc,
290cabdff1aSopenharmony_ci                                int chrFilterSize, const int16_t **alpSrc,
291cabdff1aSopenharmony_ci                                uint8_t *dest, int dstW, int dstY)
292cabdff1aSopenharmony_ci{
293cabdff1aSopenharmony_ci    x86_reg dummy=0;
294cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
295cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
296cabdff1aSopenharmony_ci
297cabdff1aSopenharmony_ci    if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
298cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX
299cabdff1aSopenharmony_ci        YSCALEYUV2RGBX
300cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
301cabdff1aSopenharmony_ci        "psraw                        $3, %%mm1         \n\t"
302cabdff1aSopenharmony_ci        "psraw                        $3, %%mm7         \n\t"
303cabdff1aSopenharmony_ci        "packuswb                  %%mm7, %%mm1         \n\t"
304cabdff1aSopenharmony_ci        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
305cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_END
306cabdff1aSopenharmony_ci    } else {
307cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX
308cabdff1aSopenharmony_ci        YSCALEYUV2RGBX
309cabdff1aSopenharmony_ci        "pcmpeqd %%mm7, %%mm7 \n\t"
310cabdff1aSopenharmony_ci        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
311cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_END
312cabdff1aSopenharmony_ci    }
313cabdff1aSopenharmony_ci}
314cabdff1aSopenharmony_ci
315cabdff1aSopenharmony_cistatic void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
316cabdff1aSopenharmony_ci                                const int16_t **lumSrc, int lumFilterSize,
317cabdff1aSopenharmony_ci                                const int16_t *chrFilter, const int16_t **chrUSrc,
318cabdff1aSopenharmony_ci                                const int16_t **chrVSrc,
319cabdff1aSopenharmony_ci                                int chrFilterSize, const int16_t **alpSrc,
320cabdff1aSopenharmony_ci                                uint8_t *dest, int dstW, int dstY)
321cabdff1aSopenharmony_ci{
322cabdff1aSopenharmony_ci    x86_reg dummy=0;
323cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
324cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
325cabdff1aSopenharmony_ci
326cabdff1aSopenharmony_ci    if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
327cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX
328cabdff1aSopenharmony_ci        YSCALEYUV2RGBX
329cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
330cabdff1aSopenharmony_ci        "psraw                        $3, %%mm1         \n\t"
331cabdff1aSopenharmony_ci        "psraw                        $3, %%mm7         \n\t"
332cabdff1aSopenharmony_ci        "packuswb                  %%mm7, %%mm1         \n\t"
333cabdff1aSopenharmony_ci        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
334cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_END
335cabdff1aSopenharmony_ci    } else {
336cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX
337cabdff1aSopenharmony_ci        YSCALEYUV2RGBX
338cabdff1aSopenharmony_ci        "pcmpeqd %%mm7, %%mm7 \n\t"
339cabdff1aSopenharmony_ci        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
340cabdff1aSopenharmony_ci        YSCALEYUV2PACKEDX_END
341cabdff1aSopenharmony_ci    }
342cabdff1aSopenharmony_ci}
343cabdff1aSopenharmony_ci
344cabdff1aSopenharmony_ci#define REAL_WRITERGB16(dst, dstw, index) \
345cabdff1aSopenharmony_ci    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
346cabdff1aSopenharmony_ci    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
347cabdff1aSopenharmony_ci    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
348cabdff1aSopenharmony_ci    "psrlq           $3, %%mm2  \n\t"\
349cabdff1aSopenharmony_ci\
350cabdff1aSopenharmony_ci    "movq         %%mm2, %%mm1  \n\t"\
351cabdff1aSopenharmony_ci    "movq         %%mm4, %%mm3  \n\t"\
352cabdff1aSopenharmony_ci\
353cabdff1aSopenharmony_ci    "punpcklbw    %%mm7, %%mm3  \n\t"\
354cabdff1aSopenharmony_ci    "punpcklbw    %%mm5, %%mm2  \n\t"\
355cabdff1aSopenharmony_ci    "punpckhbw    %%mm7, %%mm4  \n\t"\
356cabdff1aSopenharmony_ci    "punpckhbw    %%mm5, %%mm1  \n\t"\
357cabdff1aSopenharmony_ci\
358cabdff1aSopenharmony_ci    "psllq           $3, %%mm3  \n\t"\
359cabdff1aSopenharmony_ci    "psllq           $3, %%mm4  \n\t"\
360cabdff1aSopenharmony_ci\
361cabdff1aSopenharmony_ci    "por          %%mm3, %%mm2  \n\t"\
362cabdff1aSopenharmony_ci    "por          %%mm4, %%mm1  \n\t"\
363cabdff1aSopenharmony_ci\
364cabdff1aSopenharmony_ci    MOVNTQ(%%mm2,  (dst, index, 2))\
365cabdff1aSopenharmony_ci    MOVNTQ(%%mm1, 8(dst, index, 2))\
366cabdff1aSopenharmony_ci\
367cabdff1aSopenharmony_ci    "add             $8, "#index"   \n\t"\
368cabdff1aSopenharmony_ci    "cmp         "dstw", "#index"   \n\t"\
369cabdff1aSopenharmony_ci    " jb             1b             \n\t"
370cabdff1aSopenharmony_ci#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
371cabdff1aSopenharmony_ci
372cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
373cabdff1aSopenharmony_ci                                    const int16_t **lumSrc, int lumFilterSize,
374cabdff1aSopenharmony_ci                                    const int16_t *chrFilter, const int16_t **chrUSrc,
375cabdff1aSopenharmony_ci                                    const int16_t **chrVSrc,
376cabdff1aSopenharmony_ci                                    int chrFilterSize, const int16_t **alpSrc,
377cabdff1aSopenharmony_ci                                    uint8_t *dest, int dstW, int dstY)
378cabdff1aSopenharmony_ci{
379cabdff1aSopenharmony_ci    x86_reg dummy=0;
380cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
381cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
382cabdff1aSopenharmony_ci
383cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_ACCURATE
384cabdff1aSopenharmony_ci    YSCALEYUV2RGBX
385cabdff1aSopenharmony_ci    "pxor %%mm7, %%mm7 \n\t"
386cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
387cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
388cabdff1aSopenharmony_ci    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
389cabdff1aSopenharmony_ci    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
390cabdff1aSopenharmony_ci    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
391cabdff1aSopenharmony_ci#endif
392cabdff1aSopenharmony_ci    WRITERGB16(%4, "%5", %%FF_REGa)
393cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_END
394cabdff1aSopenharmony_ci}
395cabdff1aSopenharmony_ci
396cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
397cabdff1aSopenharmony_ci                                 const int16_t **lumSrc, int lumFilterSize,
398cabdff1aSopenharmony_ci                                 const int16_t *chrFilter, const int16_t **chrUSrc,
399cabdff1aSopenharmony_ci                                 const int16_t **chrVSrc,
400cabdff1aSopenharmony_ci                                 int chrFilterSize, const int16_t **alpSrc,
401cabdff1aSopenharmony_ci                                 uint8_t *dest, int dstW, int dstY)
402cabdff1aSopenharmony_ci{
403cabdff1aSopenharmony_ci    x86_reg dummy=0;
404cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
405cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
406cabdff1aSopenharmony_ci
407cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX
408cabdff1aSopenharmony_ci    YSCALEYUV2RGBX
409cabdff1aSopenharmony_ci    "pxor %%mm7, %%mm7 \n\t"
410cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
411cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
412cabdff1aSopenharmony_ci    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
413cabdff1aSopenharmony_ci    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
414cabdff1aSopenharmony_ci    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
415cabdff1aSopenharmony_ci#endif
416cabdff1aSopenharmony_ci    WRITERGB16(%4, "%5", %%FF_REGa)
417cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_END
418cabdff1aSopenharmony_ci}
419cabdff1aSopenharmony_ci
420cabdff1aSopenharmony_ci#define REAL_WRITERGB15(dst, dstw, index) \
421cabdff1aSopenharmony_ci    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
422cabdff1aSopenharmony_ci    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
423cabdff1aSopenharmony_ci    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
424cabdff1aSopenharmony_ci    "psrlq           $3, %%mm2  \n\t"\
425cabdff1aSopenharmony_ci    "psrlq           $1, %%mm5  \n\t"\
426cabdff1aSopenharmony_ci\
427cabdff1aSopenharmony_ci    "movq         %%mm2, %%mm1  \n\t"\
428cabdff1aSopenharmony_ci    "movq         %%mm4, %%mm3  \n\t"\
429cabdff1aSopenharmony_ci\
430cabdff1aSopenharmony_ci    "punpcklbw    %%mm7, %%mm3  \n\t"\
431cabdff1aSopenharmony_ci    "punpcklbw    %%mm5, %%mm2  \n\t"\
432cabdff1aSopenharmony_ci    "punpckhbw    %%mm7, %%mm4  \n\t"\
433cabdff1aSopenharmony_ci    "punpckhbw    %%mm5, %%mm1  \n\t"\
434cabdff1aSopenharmony_ci\
435cabdff1aSopenharmony_ci    "psllq           $2, %%mm3  \n\t"\
436cabdff1aSopenharmony_ci    "psllq           $2, %%mm4  \n\t"\
437cabdff1aSopenharmony_ci\
438cabdff1aSopenharmony_ci    "por          %%mm3, %%mm2  \n\t"\
439cabdff1aSopenharmony_ci    "por          %%mm4, %%mm1  \n\t"\
440cabdff1aSopenharmony_ci\
441cabdff1aSopenharmony_ci    MOVNTQ(%%mm2,  (dst, index, 2))\
442cabdff1aSopenharmony_ci    MOVNTQ(%%mm1, 8(dst, index, 2))\
443cabdff1aSopenharmony_ci\
444cabdff1aSopenharmony_ci    "add             $8, "#index"   \n\t"\
445cabdff1aSopenharmony_ci    "cmp         "dstw", "#index"   \n\t"\
446cabdff1aSopenharmony_ci    " jb             1b             \n\t"
447cabdff1aSopenharmony_ci#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
448cabdff1aSopenharmony_ci
449cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
450cabdff1aSopenharmony_ci                                    const int16_t **lumSrc, int lumFilterSize,
451cabdff1aSopenharmony_ci                                    const int16_t *chrFilter, const int16_t **chrUSrc,
452cabdff1aSopenharmony_ci                                    const int16_t **chrVSrc,
453cabdff1aSopenharmony_ci                                    int chrFilterSize, const int16_t **alpSrc,
454cabdff1aSopenharmony_ci                                    uint8_t *dest, int dstW, int dstY)
455cabdff1aSopenharmony_ci{
456cabdff1aSopenharmony_ci    x86_reg dummy=0;
457cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
458cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
459cabdff1aSopenharmony_ci
460cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_ACCURATE
461cabdff1aSopenharmony_ci    YSCALEYUV2RGBX
462cabdff1aSopenharmony_ci    "pxor %%mm7, %%mm7 \n\t"
463cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
464cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
465cabdff1aSopenharmony_ci    "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
466cabdff1aSopenharmony_ci    "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
467cabdff1aSopenharmony_ci    "paddusb "RED_DITHER"(%0), %%mm5\n\t"
468cabdff1aSopenharmony_ci#endif
469cabdff1aSopenharmony_ci    WRITERGB15(%4, "%5", %%FF_REGa)
470cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_END
471cabdff1aSopenharmony_ci}
472cabdff1aSopenharmony_ci
473cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
474cabdff1aSopenharmony_ci                                 const int16_t **lumSrc, int lumFilterSize,
475cabdff1aSopenharmony_ci                                 const int16_t *chrFilter, const int16_t **chrUSrc,
476cabdff1aSopenharmony_ci                                 const int16_t **chrVSrc,
477cabdff1aSopenharmony_ci                                 int chrFilterSize, const int16_t **alpSrc,
478cabdff1aSopenharmony_ci                                 uint8_t *dest, int dstW, int dstY)
479cabdff1aSopenharmony_ci{
480cabdff1aSopenharmony_ci    x86_reg dummy=0;
481cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
482cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
483cabdff1aSopenharmony_ci
484cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX
485cabdff1aSopenharmony_ci    YSCALEYUV2RGBX
486cabdff1aSopenharmony_ci    "pxor %%mm7, %%mm7 \n\t"
487cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
488cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
489cabdff1aSopenharmony_ci    "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
490cabdff1aSopenharmony_ci    "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
491cabdff1aSopenharmony_ci    "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
492cabdff1aSopenharmony_ci#endif
493cabdff1aSopenharmony_ci    WRITERGB15(%4, "%5", %%FF_REGa)
494cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_END
495cabdff1aSopenharmony_ci}
496cabdff1aSopenharmony_ci
497cabdff1aSopenharmony_ci#define WRITEBGR24MMX(dst, dstw, index) \
498cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
499cabdff1aSopenharmony_ci    "movq      %%mm2, %%mm1     \n\t" /* B */\
500cabdff1aSopenharmony_ci    "movq      %%mm5, %%mm6     \n\t" /* R */\
501cabdff1aSopenharmony_ci    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
502cabdff1aSopenharmony_ci    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
503cabdff1aSopenharmony_ci    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
504cabdff1aSopenharmony_ci    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
505cabdff1aSopenharmony_ci    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
506cabdff1aSopenharmony_ci    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
507cabdff1aSopenharmony_ci    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
508cabdff1aSopenharmony_ci    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
509cabdff1aSopenharmony_ci    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
510cabdff1aSopenharmony_ci    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
511cabdff1aSopenharmony_ci\
512cabdff1aSopenharmony_ci    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
513cabdff1aSopenharmony_ci    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
514cabdff1aSopenharmony_ci    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
515cabdff1aSopenharmony_ci    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
516cabdff1aSopenharmony_ci\
517cabdff1aSopenharmony_ci    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
518cabdff1aSopenharmony_ci    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
519cabdff1aSopenharmony_ci    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
520cabdff1aSopenharmony_ci    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
521cabdff1aSopenharmony_ci\
522cabdff1aSopenharmony_ci    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
523cabdff1aSopenharmony_ci    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
524cabdff1aSopenharmony_ci    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
525cabdff1aSopenharmony_ci    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
526cabdff1aSopenharmony_ci\
527cabdff1aSopenharmony_ci    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
528cabdff1aSopenharmony_ci    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
529cabdff1aSopenharmony_ci    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
530cabdff1aSopenharmony_ci    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
531cabdff1aSopenharmony_ci    MOVNTQ(%%mm0, (dst))\
532cabdff1aSopenharmony_ci\
533cabdff1aSopenharmony_ci    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
534cabdff1aSopenharmony_ci    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
535cabdff1aSopenharmony_ci    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
536cabdff1aSopenharmony_ci    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
537cabdff1aSopenharmony_ci    MOVNTQ(%%mm6, 8(dst))\
538cabdff1aSopenharmony_ci\
539cabdff1aSopenharmony_ci    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
540cabdff1aSopenharmony_ci    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
541cabdff1aSopenharmony_ci    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
542cabdff1aSopenharmony_ci    MOVNTQ(%%mm5, 16(dst))\
543cabdff1aSopenharmony_ci\
544cabdff1aSopenharmony_ci    "add         $24, "#dst"    \n\t"\
545cabdff1aSopenharmony_ci\
546cabdff1aSopenharmony_ci    "add          $8, "#index"  \n\t"\
547cabdff1aSopenharmony_ci    "cmp      "dstw", "#index"  \n\t"\
548cabdff1aSopenharmony_ci    " jb          1b            \n\t"
549cabdff1aSopenharmony_ci
550cabdff1aSopenharmony_ci#define WRITEBGR24MMXEXT(dst, dstw, index) \
551cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
552cabdff1aSopenharmony_ci    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
553cabdff1aSopenharmony_ci    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
554cabdff1aSopenharmony_ci    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
555cabdff1aSopenharmony_ci    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
556cabdff1aSopenharmony_ci    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
557cabdff1aSopenharmony_ci\
558cabdff1aSopenharmony_ci    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
559cabdff1aSopenharmony_ci    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
560cabdff1aSopenharmony_ci    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
561cabdff1aSopenharmony_ci\
562cabdff1aSopenharmony_ci    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
563cabdff1aSopenharmony_ci    "por    %%mm1, %%mm6        \n\t"\
564cabdff1aSopenharmony_ci    "por    %%mm3, %%mm6        \n\t"\
565cabdff1aSopenharmony_ci    MOVNTQ(%%mm6, (dst))\
566cabdff1aSopenharmony_ci\
567cabdff1aSopenharmony_ci    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
568cabdff1aSopenharmony_ci    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
569cabdff1aSopenharmony_ci    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
570cabdff1aSopenharmony_ci    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
571cabdff1aSopenharmony_ci\
572cabdff1aSopenharmony_ci    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
573cabdff1aSopenharmony_ci    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
574cabdff1aSopenharmony_ci    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
575cabdff1aSopenharmony_ci\
576cabdff1aSopenharmony_ci    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
577cabdff1aSopenharmony_ci    "por    %%mm3, %%mm6        \n\t"\
578cabdff1aSopenharmony_ci    MOVNTQ(%%mm6, 8(dst))\
579cabdff1aSopenharmony_ci\
580cabdff1aSopenharmony_ci    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
581cabdff1aSopenharmony_ci    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
582cabdff1aSopenharmony_ci    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
583cabdff1aSopenharmony_ci\
584cabdff1aSopenharmony_ci    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
585cabdff1aSopenharmony_ci    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
586cabdff1aSopenharmony_ci    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
587cabdff1aSopenharmony_ci\
588cabdff1aSopenharmony_ci    "por    %%mm1, %%mm3        \n\t"\
589cabdff1aSopenharmony_ci    "por    %%mm3, %%mm6        \n\t"\
590cabdff1aSopenharmony_ci    MOVNTQ(%%mm6, 16(dst))\
591cabdff1aSopenharmony_ci\
592cabdff1aSopenharmony_ci    "add      $24, "#dst"       \n\t"\
593cabdff1aSopenharmony_ci\
594cabdff1aSopenharmony_ci    "add       $8, "#index"     \n\t"\
595cabdff1aSopenharmony_ci    "cmp   "dstw", "#index"     \n\t"\
596cabdff1aSopenharmony_ci    " jb       1b               \n\t"
597cabdff1aSopenharmony_ci
598cabdff1aSopenharmony_ci#undef WRITEBGR24
599cabdff1aSopenharmony_ci#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMXEXT(dst, dstw, index)
600cabdff1aSopenharmony_ci
601cabdff1aSopenharmony_ci#if HAVE_6REGS
602cabdff1aSopenharmony_cistatic void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
603cabdff1aSopenharmony_ci                                   const int16_t **lumSrc, int lumFilterSize,
604cabdff1aSopenharmony_ci                                   const int16_t *chrFilter, const int16_t **chrUSrc,
605cabdff1aSopenharmony_ci                                   const int16_t **chrVSrc,
606cabdff1aSopenharmony_ci                                   int chrFilterSize, const int16_t **alpSrc,
607cabdff1aSopenharmony_ci                                   uint8_t *dest, int dstW, int dstY)
608cabdff1aSopenharmony_ci{
609cabdff1aSopenharmony_ci    x86_reg dummy=0;
610cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
611cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
612cabdff1aSopenharmony_ci
613cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_ACCURATE
614cabdff1aSopenharmony_ci    YSCALEYUV2RGBX
615cabdff1aSopenharmony_ci    "pxor %%mm7, %%mm7 \n\t"
616cabdff1aSopenharmony_ci    "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c"\n\t" //FIXME optimize
617cabdff1aSopenharmony_ci    "add %4, %%"FF_REG_c"                        \n\t"
618cabdff1aSopenharmony_ci    WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
619cabdff1aSopenharmony_ci    :: "r" (&c->redDither),
620cabdff1aSopenharmony_ci       "m" (dummy), "m" (dummy), "m" (dummy),
621cabdff1aSopenharmony_ci       "r" (dest), "m" (dstW_reg), "m"(uv_off)
622cabdff1aSopenharmony_ci       NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
623cabdff1aSopenharmony_ci    : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
624cabdff1aSopenharmony_ci    );
625cabdff1aSopenharmony_ci}
626cabdff1aSopenharmony_ci
627cabdff1aSopenharmony_cistatic void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
628cabdff1aSopenharmony_ci                                const int16_t **lumSrc, int lumFilterSize,
629cabdff1aSopenharmony_ci                                const int16_t *chrFilter, const int16_t **chrUSrc,
630cabdff1aSopenharmony_ci                                const int16_t **chrVSrc,
631cabdff1aSopenharmony_ci                                int chrFilterSize, const int16_t **alpSrc,
632cabdff1aSopenharmony_ci                                uint8_t *dest, int dstW, int dstY)
633cabdff1aSopenharmony_ci{
634cabdff1aSopenharmony_ci    x86_reg dummy=0;
635cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
636cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
637cabdff1aSopenharmony_ci
638cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX
639cabdff1aSopenharmony_ci    YSCALEYUV2RGBX
640cabdff1aSopenharmony_ci    "pxor                    %%mm7, %%mm7              \n\t"
641cabdff1aSopenharmony_ci    "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize
642cabdff1aSopenharmony_ci    "add                        %4, %%"FF_REG_c"       \n\t"
643cabdff1aSopenharmony_ci    WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
644cabdff1aSopenharmony_ci    :: "r" (&c->redDither),
645cabdff1aSopenharmony_ci       "m" (dummy), "m" (dummy), "m" (dummy),
646cabdff1aSopenharmony_ci       "r" (dest),  "m" (dstW_reg), "m"(uv_off)
647cabdff1aSopenharmony_ci       NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
648cabdff1aSopenharmony_ci    : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
649cabdff1aSopenharmony_ci    );
650cabdff1aSopenharmony_ci}
651cabdff1aSopenharmony_ci#endif /* HAVE_6REGS */
652cabdff1aSopenharmony_ci
653cabdff1aSopenharmony_ci#define REAL_WRITEYUY2(dst, dstw, index) \
654cabdff1aSopenharmony_ci    "packuswb  %%mm3, %%mm3     \n\t"\
655cabdff1aSopenharmony_ci    "packuswb  %%mm4, %%mm4     \n\t"\
656cabdff1aSopenharmony_ci    "packuswb  %%mm7, %%mm1     \n\t"\
657cabdff1aSopenharmony_ci    "punpcklbw %%mm4, %%mm3     \n\t"\
658cabdff1aSopenharmony_ci    "movq      %%mm1, %%mm7     \n\t"\
659cabdff1aSopenharmony_ci    "punpcklbw %%mm3, %%mm1     \n\t"\
660cabdff1aSopenharmony_ci    "punpckhbw %%mm3, %%mm7     \n\t"\
661cabdff1aSopenharmony_ci\
662cabdff1aSopenharmony_ci    MOVNTQ(%%mm1, (dst, index, 2))\
663cabdff1aSopenharmony_ci    MOVNTQ(%%mm7, 8(dst, index, 2))\
664cabdff1aSopenharmony_ci\
665cabdff1aSopenharmony_ci    "add          $8, "#index"  \n\t"\
666cabdff1aSopenharmony_ci    "cmp      "dstw", "#index"  \n\t"\
667cabdff1aSopenharmony_ci    " jb          1b            \n\t"
668cabdff1aSopenharmony_ci#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
669cabdff1aSopenharmony_ci
670cabdff1aSopenharmony_cistatic void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
671cabdff1aSopenharmony_ci                                     const int16_t **lumSrc, int lumFilterSize,
672cabdff1aSopenharmony_ci                                     const int16_t *chrFilter, const int16_t **chrUSrc,
673cabdff1aSopenharmony_ci                                     const int16_t **chrVSrc,
674cabdff1aSopenharmony_ci                                     int chrFilterSize, const int16_t **alpSrc,
675cabdff1aSopenharmony_ci                                     uint8_t *dest, int dstW, int dstY)
676cabdff1aSopenharmony_ci{
677cabdff1aSopenharmony_ci    x86_reg dummy=0;
678cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
679cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
680cabdff1aSopenharmony_ci
681cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_ACCURATE
682cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
683cabdff1aSopenharmony_ci    "psraw $3, %%mm3    \n\t"
684cabdff1aSopenharmony_ci    "psraw $3, %%mm4    \n\t"
685cabdff1aSopenharmony_ci    "psraw $3, %%mm1    \n\t"
686cabdff1aSopenharmony_ci    "psraw $3, %%mm7    \n\t"
687cabdff1aSopenharmony_ci    WRITEYUY2(%4, "%5", %%FF_REGa)
688cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_END
689cabdff1aSopenharmony_ci}
690cabdff1aSopenharmony_ci
691cabdff1aSopenharmony_cistatic void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
692cabdff1aSopenharmony_ci                                  const int16_t **lumSrc, int lumFilterSize,
693cabdff1aSopenharmony_ci                                  const int16_t *chrFilter, const int16_t **chrUSrc,
694cabdff1aSopenharmony_ci                                  const int16_t **chrVSrc,
695cabdff1aSopenharmony_ci                                  int chrFilterSize, const int16_t **alpSrc,
696cabdff1aSopenharmony_ci                                  uint8_t *dest, int dstW, int dstY)
697cabdff1aSopenharmony_ci{
698cabdff1aSopenharmony_ci    x86_reg dummy=0;
699cabdff1aSopenharmony_ci    x86_reg dstW_reg = dstW;
700cabdff1aSopenharmony_ci    x86_reg uv_off = c->uv_offx2;
701cabdff1aSopenharmony_ci
702cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX
703cabdff1aSopenharmony_ci    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
704cabdff1aSopenharmony_ci    "psraw $3, %%mm3    \n\t"
705cabdff1aSopenharmony_ci    "psraw $3, %%mm4    \n\t"
706cabdff1aSopenharmony_ci    "psraw $3, %%mm1    \n\t"
707cabdff1aSopenharmony_ci    "psraw $3, %%mm7    \n\t"
708cabdff1aSopenharmony_ci    WRITEYUY2(%4, "%5", %%FF_REGa)
709cabdff1aSopenharmony_ci    YSCALEYUV2PACKEDX_END
710cabdff1aSopenharmony_ci}
711cabdff1aSopenharmony_ci
712cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2RGB_UV(index, c) \
713cabdff1aSopenharmony_ci    "xor            "#index", "#index"  \n\t"\
714cabdff1aSopenharmony_ci    ".p2align              4            \n\t"\
715cabdff1aSopenharmony_ci    "1:                                 \n\t"\
716cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
717cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
718cabdff1aSopenharmony_ci    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
719cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
720cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
721cabdff1aSopenharmony_ci    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
722cabdff1aSopenharmony_ci    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
723cabdff1aSopenharmony_ci    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
724cabdff1aSopenharmony_ci    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
725cabdff1aSopenharmony_ci    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
726cabdff1aSopenharmony_ci    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
727cabdff1aSopenharmony_ci    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
728cabdff1aSopenharmony_ci    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
729cabdff1aSopenharmony_ci    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
730cabdff1aSopenharmony_ci    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
731cabdff1aSopenharmony_ci    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
732cabdff1aSopenharmony_ci    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
733cabdff1aSopenharmony_ci    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
734cabdff1aSopenharmony_ci    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
735cabdff1aSopenharmony_ci    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
736cabdff1aSopenharmony_ci    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
737cabdff1aSopenharmony_ci    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
738cabdff1aSopenharmony_ci
739cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
740cabdff1aSopenharmony_ci    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
741cabdff1aSopenharmony_ci    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
742cabdff1aSopenharmony_ci    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
743cabdff1aSopenharmony_ci    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
744cabdff1aSopenharmony_ci    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
745cabdff1aSopenharmony_ci    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
746cabdff1aSopenharmony_ci    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
747cabdff1aSopenharmony_ci    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
748cabdff1aSopenharmony_ci    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
749cabdff1aSopenharmony_ci    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
750cabdff1aSopenharmony_ci    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
751cabdff1aSopenharmony_ci    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
752cabdff1aSopenharmony_ci
753cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2RGB_COEFF(c) \
754cabdff1aSopenharmony_ci    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
755cabdff1aSopenharmony_ci    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
756cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
757cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
758cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
759cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
760cabdff1aSopenharmony_ci    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
761cabdff1aSopenharmony_ci    "paddw             %%mm3, %%mm4     \n\t"\
762cabdff1aSopenharmony_ci    "movq              %%mm2, %%mm0     \n\t"\
763cabdff1aSopenharmony_ci    "movq              %%mm5, %%mm6     \n\t"\
764cabdff1aSopenharmony_ci    "movq              %%mm4, %%mm3     \n\t"\
765cabdff1aSopenharmony_ci    "punpcklwd         %%mm2, %%mm2     \n\t"\
766cabdff1aSopenharmony_ci    "punpcklwd         %%mm5, %%mm5     \n\t"\
767cabdff1aSopenharmony_ci    "punpcklwd         %%mm4, %%mm4     \n\t"\
768cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm2     \n\t"\
769cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm5     \n\t"\
770cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm4     \n\t"\
771cabdff1aSopenharmony_ci    "punpckhwd         %%mm0, %%mm0     \n\t"\
772cabdff1aSopenharmony_ci    "punpckhwd         %%mm6, %%mm6     \n\t"\
773cabdff1aSopenharmony_ci    "punpckhwd         %%mm3, %%mm3     \n\t"\
774cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm0     \n\t"\
775cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm6     \n\t"\
776cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm3     \n\t"\
777cabdff1aSopenharmony_ci    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
778cabdff1aSopenharmony_ci    "packuswb          %%mm0, %%mm2     \n\t"\
779cabdff1aSopenharmony_ci    "packuswb          %%mm6, %%mm5     \n\t"\
780cabdff1aSopenharmony_ci    "packuswb          %%mm3, %%mm4     \n\t"\
781cabdff1aSopenharmony_ci
782cabdff1aSopenharmony_ci#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
783cabdff1aSopenharmony_ci
784cabdff1aSopenharmony_ci#define YSCALEYUV2RGB(index, c) \
785cabdff1aSopenharmony_ci    REAL_YSCALEYUV2RGB_UV(index, c) \
786cabdff1aSopenharmony_ci    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
787cabdff1aSopenharmony_ci    REAL_YSCALEYUV2RGB_COEFF(c)
788cabdff1aSopenharmony_ci
789cabdff1aSopenharmony_ci/**
790cabdff1aSopenharmony_ci * vertical bilinear scale YV12 to RGB
791cabdff1aSopenharmony_ci */
792cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
793cabdff1aSopenharmony_ci                                const int16_t *ubuf[2], const int16_t *vbuf[2],
794cabdff1aSopenharmony_ci                                const int16_t *abuf[2], uint8_t *dest,
795cabdff1aSopenharmony_ci                                int dstW, int yalpha, int uvalpha, int y)
796cabdff1aSopenharmony_ci{
797cabdff1aSopenharmony_ci    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
798cabdff1aSopenharmony_ci                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
799cabdff1aSopenharmony_ci
800cabdff1aSopenharmony_ci    if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
801cabdff1aSopenharmony_ci        const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
802cabdff1aSopenharmony_ci#if ARCH_X86_64
803cabdff1aSopenharmony_ci        __asm__ volatile(
804cabdff1aSopenharmony_ci            YSCALEYUV2RGB(%%r8, %5)
805cabdff1aSopenharmony_ci            YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
806cabdff1aSopenharmony_ci            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
807cabdff1aSopenharmony_ci            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
808cabdff1aSopenharmony_ci            "packuswb            %%mm7, %%mm1       \n\t"
809cabdff1aSopenharmony_ci            WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
810cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
811cabdff1aSopenharmony_ci               "a" (&c->redDither),
812cabdff1aSopenharmony_ci               "r" (abuf0), "r" (abuf1)
813cabdff1aSopenharmony_ci            : "%r8"
814cabdff1aSopenharmony_ci        );
815cabdff1aSopenharmony_ci#else
816cabdff1aSopenharmony_ci        c->u_temp=(intptr_t)abuf0;
817cabdff1aSopenharmony_ci        c->v_temp=(intptr_t)abuf1;
818cabdff1aSopenharmony_ci        __asm__ volatile(
819cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
820cabdff1aSopenharmony_ci            "mov        %4, %%"FF_REG_b"            \n\t"
821cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
822cabdff1aSopenharmony_ci            YSCALEYUV2RGB(%%FF_REGBP, %5)
823cabdff1aSopenharmony_ci            "push                   %0              \n\t"
824cabdff1aSopenharmony_ci            "push                   %1              \n\t"
825cabdff1aSopenharmony_ci            "mov          "U_TEMP"(%5), %0          \n\t"
826cabdff1aSopenharmony_ci            "mov          "V_TEMP"(%5), %1          \n\t"
827cabdff1aSopenharmony_ci            YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
828cabdff1aSopenharmony_ci            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
829cabdff1aSopenharmony_ci            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
830cabdff1aSopenharmony_ci            "packuswb            %%mm7, %%mm1       \n\t"
831cabdff1aSopenharmony_ci            "pop                    %1              \n\t"
832cabdff1aSopenharmony_ci            "pop                    %0              \n\t"
833cabdff1aSopenharmony_ci            WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
834cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
835cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
836cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
837cabdff1aSopenharmony_ci               "a" (&c->redDither)
838cabdff1aSopenharmony_ci        );
839cabdff1aSopenharmony_ci#endif
840cabdff1aSopenharmony_ci    } else {
841cabdff1aSopenharmony_ci        __asm__ volatile(
842cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
843cabdff1aSopenharmony_ci            "mov        %4, %%"FF_REG_b"            \n\t"
844cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
845cabdff1aSopenharmony_ci            YSCALEYUV2RGB(%%FF_REGBP, %5)
846cabdff1aSopenharmony_ci            "pcmpeqd %%mm7, %%mm7                   \n\t"
847cabdff1aSopenharmony_ci            WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
848cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
849cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
850cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
851cabdff1aSopenharmony_ci               "a" (&c->redDither)
852cabdff1aSopenharmony_ci        );
853cabdff1aSopenharmony_ci    }
854cabdff1aSopenharmony_ci}
855cabdff1aSopenharmony_ci
856cabdff1aSopenharmony_cistatic void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
857cabdff1aSopenharmony_ci                                const int16_t *ubuf[2], const int16_t *vbuf[2],
858cabdff1aSopenharmony_ci                                const int16_t *abuf[2], uint8_t *dest,
859cabdff1aSopenharmony_ci                                int dstW, int yalpha, int uvalpha, int y)
860cabdff1aSopenharmony_ci{
861cabdff1aSopenharmony_ci    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
862cabdff1aSopenharmony_ci                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
863cabdff1aSopenharmony_ci
864cabdff1aSopenharmony_ci    __asm__ volatile(
865cabdff1aSopenharmony_ci        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
866cabdff1aSopenharmony_ci        "mov           %4, %%"FF_REG_b"         \n\t"
867cabdff1aSopenharmony_ci        "push %%"FF_REG_BP"                     \n\t"
868cabdff1aSopenharmony_ci        YSCALEYUV2RGB(%%FF_REGBP, %5)
869cabdff1aSopenharmony_ci        "pxor    %%mm7, %%mm7                   \n\t"
870cabdff1aSopenharmony_ci        WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
871cabdff1aSopenharmony_ci        "pop %%"FF_REG_BP"                      \n\t"
872cabdff1aSopenharmony_ci        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
873cabdff1aSopenharmony_ci        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
874cabdff1aSopenharmony_ci           "a" (&c->redDither)
875cabdff1aSopenharmony_ci           NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
876cabdff1aSopenharmony_ci    );
877cabdff1aSopenharmony_ci}
878cabdff1aSopenharmony_ci
879cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
880cabdff1aSopenharmony_ci                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
881cabdff1aSopenharmony_ci                                 const int16_t *abuf[2], uint8_t *dest,
882cabdff1aSopenharmony_ci                                 int dstW, int yalpha, int uvalpha, int y)
883cabdff1aSopenharmony_ci{
884cabdff1aSopenharmony_ci    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
885cabdff1aSopenharmony_ci                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
886cabdff1aSopenharmony_ci
887cabdff1aSopenharmony_ci    __asm__ volatile(
888cabdff1aSopenharmony_ci        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
889cabdff1aSopenharmony_ci        "mov        %4, %%"FF_REG_b"            \n\t"
890cabdff1aSopenharmony_ci        "push %%"FF_REG_BP"                     \n\t"
891cabdff1aSopenharmony_ci        YSCALEYUV2RGB(%%FF_REGBP, %5)
892cabdff1aSopenharmony_ci        "pxor    %%mm7, %%mm7                   \n\t"
893cabdff1aSopenharmony_ci        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
894cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
895cabdff1aSopenharmony_ci        "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
896cabdff1aSopenharmony_ci        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
897cabdff1aSopenharmony_ci        "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
898cabdff1aSopenharmony_ci#endif
899cabdff1aSopenharmony_ci        WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
900cabdff1aSopenharmony_ci        "pop %%"FF_REG_BP"                      \n\t"
901cabdff1aSopenharmony_ci        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
902cabdff1aSopenharmony_ci        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
903cabdff1aSopenharmony_ci           "a" (&c->redDither)
904cabdff1aSopenharmony_ci           NAMED_CONSTRAINTS_ADD(bF8)
905cabdff1aSopenharmony_ci    );
906cabdff1aSopenharmony_ci}
907cabdff1aSopenharmony_ci
908cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
909cabdff1aSopenharmony_ci                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
910cabdff1aSopenharmony_ci                                 const int16_t *abuf[2], uint8_t *dest,
911cabdff1aSopenharmony_ci                                 int dstW, int yalpha, int uvalpha, int y)
912cabdff1aSopenharmony_ci{
913cabdff1aSopenharmony_ci    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
914cabdff1aSopenharmony_ci                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
915cabdff1aSopenharmony_ci
916cabdff1aSopenharmony_ci    __asm__ volatile(
917cabdff1aSopenharmony_ci        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
918cabdff1aSopenharmony_ci        "mov           %4, %%"FF_REG_b"         \n\t"
919cabdff1aSopenharmony_ci        "push %%"FF_REG_BP"                     \n\t"
920cabdff1aSopenharmony_ci        YSCALEYUV2RGB(%%FF_REGBP, %5)
921cabdff1aSopenharmony_ci        "pxor    %%mm7, %%mm7                   \n\t"
922cabdff1aSopenharmony_ci        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
923cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
924cabdff1aSopenharmony_ci        "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
925cabdff1aSopenharmony_ci        "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
926cabdff1aSopenharmony_ci        "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
927cabdff1aSopenharmony_ci#endif
928cabdff1aSopenharmony_ci        WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
929cabdff1aSopenharmony_ci        "pop %%"FF_REG_BP"                      \n\t"
930cabdff1aSopenharmony_ci        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
931cabdff1aSopenharmony_ci        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
932cabdff1aSopenharmony_ci           "a" (&c->redDither)
933cabdff1aSopenharmony_ci           NAMED_CONSTRAINTS_ADD(bF8,bFC)
934cabdff1aSopenharmony_ci    );
935cabdff1aSopenharmony_ci}
936cabdff1aSopenharmony_ci
937cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2PACKED(index, c) \
938cabdff1aSopenharmony_ci    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
939cabdff1aSopenharmony_ci    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
940cabdff1aSopenharmony_ci    "psraw                $3, %%mm0                           \n\t"\
941cabdff1aSopenharmony_ci    "psraw                $3, %%mm1                           \n\t"\
942cabdff1aSopenharmony_ci    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
943cabdff1aSopenharmony_ci    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
944cabdff1aSopenharmony_ci    "xor            "#index", "#index"                        \n\t"\
945cabdff1aSopenharmony_ci    ".p2align              4            \n\t"\
946cabdff1aSopenharmony_ci    "1:                                 \n\t"\
947cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
948cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
949cabdff1aSopenharmony_ci    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
950cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
951cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
952cabdff1aSopenharmony_ci    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
953cabdff1aSopenharmony_ci    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
954cabdff1aSopenharmony_ci    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
955cabdff1aSopenharmony_ci    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
956cabdff1aSopenharmony_ci    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
957cabdff1aSopenharmony_ci    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
958cabdff1aSopenharmony_ci    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
959cabdff1aSopenharmony_ci    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
960cabdff1aSopenharmony_ci    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
961cabdff1aSopenharmony_ci    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
962cabdff1aSopenharmony_ci    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
963cabdff1aSopenharmony_ci    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
964cabdff1aSopenharmony_ci    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
965cabdff1aSopenharmony_ci    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
966cabdff1aSopenharmony_ci    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
967cabdff1aSopenharmony_ci    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
968cabdff1aSopenharmony_ci    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
969cabdff1aSopenharmony_ci    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
970cabdff1aSopenharmony_ci    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
971cabdff1aSopenharmony_ci    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
972cabdff1aSopenharmony_ci    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
973cabdff1aSopenharmony_ci    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
974cabdff1aSopenharmony_ci
975cabdff1aSopenharmony_ci#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
976cabdff1aSopenharmony_ci
977cabdff1aSopenharmony_cistatic void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
978cabdff1aSopenharmony_ci                                  const int16_t *ubuf[2], const int16_t *vbuf[2],
979cabdff1aSopenharmony_ci                                  const int16_t *abuf[2], uint8_t *dest,
980cabdff1aSopenharmony_ci                                  int dstW, int yalpha, int uvalpha, int y)
981cabdff1aSopenharmony_ci{
982cabdff1aSopenharmony_ci    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
983cabdff1aSopenharmony_ci                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
984cabdff1aSopenharmony_ci
985cabdff1aSopenharmony_ci    __asm__ volatile(
986cabdff1aSopenharmony_ci        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
987cabdff1aSopenharmony_ci        "mov           %4, %%"FF_REG_b"         \n\t"
988cabdff1aSopenharmony_ci        "push %%"FF_REG_BP"                     \n\t"
989cabdff1aSopenharmony_ci        YSCALEYUV2PACKED(%%FF_REGBP, %5)
990cabdff1aSopenharmony_ci        WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
991cabdff1aSopenharmony_ci        "pop %%"FF_REG_BP"                      \n\t"
992cabdff1aSopenharmony_ci        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
993cabdff1aSopenharmony_ci        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
994cabdff1aSopenharmony_ci           "a" (&c->redDither)
995cabdff1aSopenharmony_ci    );
996cabdff1aSopenharmony_ci}
997cabdff1aSopenharmony_ci
998cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2RGB1(index, c) \
999cabdff1aSopenharmony_ci    "xor            "#index", "#index"  \n\t"\
1000cabdff1aSopenharmony_ci    ".p2align              4            \n\t"\
1001cabdff1aSopenharmony_ci    "1:                                 \n\t"\
1002cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
1003cabdff1aSopenharmony_ci    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1004cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
1005cabdff1aSopenharmony_ci    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1006cabdff1aSopenharmony_ci    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
1007cabdff1aSopenharmony_ci    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
1008cabdff1aSopenharmony_ci    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
1009cabdff1aSopenharmony_ci    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
1010cabdff1aSopenharmony_ci    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
1011cabdff1aSopenharmony_ci    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
1012cabdff1aSopenharmony_ci    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
1013cabdff1aSopenharmony_ci    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
1014cabdff1aSopenharmony_ci    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1015cabdff1aSopenharmony_ci    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
1016cabdff1aSopenharmony_ci    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
1017cabdff1aSopenharmony_ci    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1018cabdff1aSopenharmony_ci    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1019cabdff1aSopenharmony_ci    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
1020cabdff1aSopenharmony_ci    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
1021cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
1022cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
1023cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
1024cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
1025cabdff1aSopenharmony_ci    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1026cabdff1aSopenharmony_ci    "paddw             %%mm3, %%mm4     \n\t"\
1027cabdff1aSopenharmony_ci    "movq              %%mm2, %%mm0     \n\t"\
1028cabdff1aSopenharmony_ci    "movq              %%mm5, %%mm6     \n\t"\
1029cabdff1aSopenharmony_ci    "movq              %%mm4, %%mm3     \n\t"\
1030cabdff1aSopenharmony_ci    "punpcklwd         %%mm2, %%mm2     \n\t"\
1031cabdff1aSopenharmony_ci    "punpcklwd         %%mm5, %%mm5     \n\t"\
1032cabdff1aSopenharmony_ci    "punpcklwd         %%mm4, %%mm4     \n\t"\
1033cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm2     \n\t"\
1034cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm5     \n\t"\
1035cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm4     \n\t"\
1036cabdff1aSopenharmony_ci    "punpckhwd         %%mm0, %%mm0     \n\t"\
1037cabdff1aSopenharmony_ci    "punpckhwd         %%mm6, %%mm6     \n\t"\
1038cabdff1aSopenharmony_ci    "punpckhwd         %%mm3, %%mm3     \n\t"\
1039cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm0     \n\t"\
1040cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm6     \n\t"\
1041cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm3     \n\t"\
1042cabdff1aSopenharmony_ci    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1043cabdff1aSopenharmony_ci    "packuswb          %%mm0, %%mm2     \n\t"\
1044cabdff1aSopenharmony_ci    "packuswb          %%mm6, %%mm5     \n\t"\
1045cabdff1aSopenharmony_ci    "packuswb          %%mm3, %%mm4     \n\t"\
1046cabdff1aSopenharmony_ci
1047cabdff1aSopenharmony_ci#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
1048cabdff1aSopenharmony_ci
1049cabdff1aSopenharmony_ci// do vertical chrominance interpolation
1050cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2RGB1b(index, c) \
1051cabdff1aSopenharmony_ci    "xor            "#index", "#index"  \n\t"\
1052cabdff1aSopenharmony_ci    ".p2align              4            \n\t"\
1053cabdff1aSopenharmony_ci    "1:                                 \n\t"\
1054cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
1055cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
1056cabdff1aSopenharmony_ci    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1057cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
1058cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
1059cabdff1aSopenharmony_ci    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1060cabdff1aSopenharmony_ci    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1061cabdff1aSopenharmony_ci    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1062cabdff1aSopenharmony_ci    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
1063cabdff1aSopenharmony_ci    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
1064cabdff1aSopenharmony_ci    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
1065cabdff1aSopenharmony_ci    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
1066cabdff1aSopenharmony_ci    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
1067cabdff1aSopenharmony_ci    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
1068cabdff1aSopenharmony_ci    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
1069cabdff1aSopenharmony_ci    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
1070cabdff1aSopenharmony_ci    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1071cabdff1aSopenharmony_ci    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
1072cabdff1aSopenharmony_ci    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
1073cabdff1aSopenharmony_ci    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1074cabdff1aSopenharmony_ci    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1075cabdff1aSopenharmony_ci    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
1076cabdff1aSopenharmony_ci    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
1077cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
1078cabdff1aSopenharmony_ci    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
1079cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
1080cabdff1aSopenharmony_ci    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
1081cabdff1aSopenharmony_ci    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1082cabdff1aSopenharmony_ci    "paddw             %%mm3, %%mm4     \n\t"\
1083cabdff1aSopenharmony_ci    "movq              %%mm2, %%mm0     \n\t"\
1084cabdff1aSopenharmony_ci    "movq              %%mm5, %%mm6     \n\t"\
1085cabdff1aSopenharmony_ci    "movq              %%mm4, %%mm3     \n\t"\
1086cabdff1aSopenharmony_ci    "punpcklwd         %%mm2, %%mm2     \n\t"\
1087cabdff1aSopenharmony_ci    "punpcklwd         %%mm5, %%mm5     \n\t"\
1088cabdff1aSopenharmony_ci    "punpcklwd         %%mm4, %%mm4     \n\t"\
1089cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm2     \n\t"\
1090cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm5     \n\t"\
1091cabdff1aSopenharmony_ci    "paddw             %%mm1, %%mm4     \n\t"\
1092cabdff1aSopenharmony_ci    "punpckhwd         %%mm0, %%mm0     \n\t"\
1093cabdff1aSopenharmony_ci    "punpckhwd         %%mm6, %%mm6     \n\t"\
1094cabdff1aSopenharmony_ci    "punpckhwd         %%mm3, %%mm3     \n\t"\
1095cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm0     \n\t"\
1096cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm6     \n\t"\
1097cabdff1aSopenharmony_ci    "paddw             %%mm7, %%mm3     \n\t"\
1098cabdff1aSopenharmony_ci    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1099cabdff1aSopenharmony_ci    "packuswb          %%mm0, %%mm2     \n\t"\
1100cabdff1aSopenharmony_ci    "packuswb          %%mm6, %%mm5     \n\t"\
1101cabdff1aSopenharmony_ci    "packuswb          %%mm3, %%mm4     \n\t"\
1102cabdff1aSopenharmony_ci
1103cabdff1aSopenharmony_ci#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
1104cabdff1aSopenharmony_ci
1105cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
1106cabdff1aSopenharmony_ci    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
1107cabdff1aSopenharmony_ci    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
1108cabdff1aSopenharmony_ci    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
1109cabdff1aSopenharmony_ci    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
1110cabdff1aSopenharmony_ci    "packuswb          %%mm1, %%mm7     \n\t"
1111cabdff1aSopenharmony_ci#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
1112cabdff1aSopenharmony_ci
1113cabdff1aSopenharmony_ci/**
1114cabdff1aSopenharmony_ci * YV12 to RGB without scaling or interpolating
1115cabdff1aSopenharmony_ci */
1116cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
1117cabdff1aSopenharmony_ci                                const int16_t *ubuf[2], const int16_t *vbuf[2],
1118cabdff1aSopenharmony_ci                                const int16_t *abuf0, uint8_t *dest,
1119cabdff1aSopenharmony_ci                                int dstW, int uvalpha, int y)
1120cabdff1aSopenharmony_ci{
1121cabdff1aSopenharmony_ci    const int16_t *ubuf0 = ubuf[0];
1122cabdff1aSopenharmony_ci    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1123cabdff1aSopenharmony_ci
1124cabdff1aSopenharmony_ci    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1125cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[0];
1126cabdff1aSopenharmony_ci        if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1127cabdff1aSopenharmony_ci            __asm__ volatile(
1128cabdff1aSopenharmony_ci                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1129cabdff1aSopenharmony_ci                "mov           %4, %%"FF_REG_b"         \n\t"
1130cabdff1aSopenharmony_ci                "push %%"FF_REG_BP"                     \n\t"
1131cabdff1aSopenharmony_ci                YSCALEYUV2RGB1(%%FF_REGBP, %5)
1132cabdff1aSopenharmony_ci                YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1133cabdff1aSopenharmony_ci                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1134cabdff1aSopenharmony_ci                "pop %%"FF_REG_BP"                      \n\t"
1135cabdff1aSopenharmony_ci                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1136cabdff1aSopenharmony_ci                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1137cabdff1aSopenharmony_ci                   "a" (&c->redDither)
1138cabdff1aSopenharmony_ci            );
1139cabdff1aSopenharmony_ci        } else {
1140cabdff1aSopenharmony_ci            __asm__ volatile(
1141cabdff1aSopenharmony_ci                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1142cabdff1aSopenharmony_ci                "mov           %4, %%"FF_REG_b"         \n\t"
1143cabdff1aSopenharmony_ci                "push %%"FF_REG_BP"                     \n\t"
1144cabdff1aSopenharmony_ci                YSCALEYUV2RGB1(%%FF_REGBP, %5)
1145cabdff1aSopenharmony_ci                "pcmpeqd %%mm7, %%mm7                   \n\t"
1146cabdff1aSopenharmony_ci                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1147cabdff1aSopenharmony_ci                "pop %%"FF_REG_BP"                      \n\t"
1148cabdff1aSopenharmony_ci                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1149cabdff1aSopenharmony_ci                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1150cabdff1aSopenharmony_ci                   "a" (&c->redDither)
1151cabdff1aSopenharmony_ci            );
1152cabdff1aSopenharmony_ci        }
1153cabdff1aSopenharmony_ci    } else {
1154cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[1];
1155cabdff1aSopenharmony_ci        if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1156cabdff1aSopenharmony_ci            __asm__ volatile(
1157cabdff1aSopenharmony_ci                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1158cabdff1aSopenharmony_ci                "mov           %4, %%"FF_REG_b"         \n\t"
1159cabdff1aSopenharmony_ci                "push %%"FF_REG_BP"                     \n\t"
1160cabdff1aSopenharmony_ci                YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1161cabdff1aSopenharmony_ci                YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1162cabdff1aSopenharmony_ci                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1163cabdff1aSopenharmony_ci                "pop %%"FF_REG_BP"                      \n\t"
1164cabdff1aSopenharmony_ci                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1165cabdff1aSopenharmony_ci                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1166cabdff1aSopenharmony_ci                   "a" (&c->redDither)
1167cabdff1aSopenharmony_ci            );
1168cabdff1aSopenharmony_ci        } else {
1169cabdff1aSopenharmony_ci            __asm__ volatile(
1170cabdff1aSopenharmony_ci                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1171cabdff1aSopenharmony_ci                "mov           %4, %%"FF_REG_b"         \n\t"
1172cabdff1aSopenharmony_ci                "push %%"FF_REG_BP"                     \n\t"
1173cabdff1aSopenharmony_ci                YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1174cabdff1aSopenharmony_ci                "pcmpeqd %%mm7, %%mm7                   \n\t"
1175cabdff1aSopenharmony_ci                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1176cabdff1aSopenharmony_ci                "pop %%"FF_REG_BP"                      \n\t"
1177cabdff1aSopenharmony_ci                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1178cabdff1aSopenharmony_ci                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1179cabdff1aSopenharmony_ci                   "a" (&c->redDither)
1180cabdff1aSopenharmony_ci            );
1181cabdff1aSopenharmony_ci        }
1182cabdff1aSopenharmony_ci    }
1183cabdff1aSopenharmony_ci}
1184cabdff1aSopenharmony_ci
1185cabdff1aSopenharmony_cistatic void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
1186cabdff1aSopenharmony_ci                                const int16_t *ubuf[2], const int16_t *vbuf[2],
1187cabdff1aSopenharmony_ci                                const int16_t *abuf0, uint8_t *dest,
1188cabdff1aSopenharmony_ci                                int dstW, int uvalpha, int y)
1189cabdff1aSopenharmony_ci{
1190cabdff1aSopenharmony_ci    const int16_t *ubuf0 = ubuf[0];
1191cabdff1aSopenharmony_ci    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1192cabdff1aSopenharmony_ci
1193cabdff1aSopenharmony_ci    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1194cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[0];
1195cabdff1aSopenharmony_ci        __asm__ volatile(
1196cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1197cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1198cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1199cabdff1aSopenharmony_ci            YSCALEYUV2RGB1(%%FF_REGBP, %5)
1200cabdff1aSopenharmony_ci            "pxor    %%mm7, %%mm7                   \n\t"
1201cabdff1aSopenharmony_ci            WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1202cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1203cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1204cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1205cabdff1aSopenharmony_ci               "a" (&c->redDither)
1206cabdff1aSopenharmony_ci               NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1207cabdff1aSopenharmony_ci        );
1208cabdff1aSopenharmony_ci    } else {
1209cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[1];
1210cabdff1aSopenharmony_ci        __asm__ volatile(
1211cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1212cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1213cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1214cabdff1aSopenharmony_ci            YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1215cabdff1aSopenharmony_ci            "pxor    %%mm7, %%mm7                   \n\t"
1216cabdff1aSopenharmony_ci            WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1217cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1218cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1219cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1220cabdff1aSopenharmony_ci               "a" (&c->redDither)
1221cabdff1aSopenharmony_ci               NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1222cabdff1aSopenharmony_ci        );
1223cabdff1aSopenharmony_ci    }
1224cabdff1aSopenharmony_ci}
1225cabdff1aSopenharmony_ci
1226cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
1227cabdff1aSopenharmony_ci                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
1228cabdff1aSopenharmony_ci                                 const int16_t *abuf0, uint8_t *dest,
1229cabdff1aSopenharmony_ci                                 int dstW, int uvalpha, int y)
1230cabdff1aSopenharmony_ci{
1231cabdff1aSopenharmony_ci    const int16_t *ubuf0 = ubuf[0];
1232cabdff1aSopenharmony_ci    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1233cabdff1aSopenharmony_ci
1234cabdff1aSopenharmony_ci    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1235cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[0];
1236cabdff1aSopenharmony_ci        __asm__ volatile(
1237cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1238cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1239cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1240cabdff1aSopenharmony_ci            YSCALEYUV2RGB1(%%FF_REGBP, %5)
1241cabdff1aSopenharmony_ci            "pxor    %%mm7, %%mm7                   \n\t"
1242cabdff1aSopenharmony_ci            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1243cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
1244cabdff1aSopenharmony_ci            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
1245cabdff1aSopenharmony_ci            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1246cabdff1aSopenharmony_ci            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
1247cabdff1aSopenharmony_ci#endif
1248cabdff1aSopenharmony_ci            WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1249cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1250cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1251cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1252cabdff1aSopenharmony_ci               "a" (&c->redDither)
1253cabdff1aSopenharmony_ci               NAMED_CONSTRAINTS_ADD(bF8)
1254cabdff1aSopenharmony_ci        );
1255cabdff1aSopenharmony_ci    } else {
1256cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[1];
1257cabdff1aSopenharmony_ci        __asm__ volatile(
1258cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1259cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1260cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1261cabdff1aSopenharmony_ci            YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1262cabdff1aSopenharmony_ci            "pxor    %%mm7, %%mm7                   \n\t"
1263cabdff1aSopenharmony_ci            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1264cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
1265cabdff1aSopenharmony_ci            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
1266cabdff1aSopenharmony_ci            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1267cabdff1aSopenharmony_ci            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
1268cabdff1aSopenharmony_ci#endif
1269cabdff1aSopenharmony_ci            WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1270cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1271cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1272cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1273cabdff1aSopenharmony_ci               "a" (&c->redDither)
1274cabdff1aSopenharmony_ci               NAMED_CONSTRAINTS_ADD(bF8)
1275cabdff1aSopenharmony_ci        );
1276cabdff1aSopenharmony_ci    }
1277cabdff1aSopenharmony_ci}
1278cabdff1aSopenharmony_ci
1279cabdff1aSopenharmony_cistatic void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
1280cabdff1aSopenharmony_ci                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
1281cabdff1aSopenharmony_ci                                 const int16_t *abuf0, uint8_t *dest,
1282cabdff1aSopenharmony_ci                                 int dstW, int uvalpha, int y)
1283cabdff1aSopenharmony_ci{
1284cabdff1aSopenharmony_ci    const int16_t *ubuf0 = ubuf[0];
1285cabdff1aSopenharmony_ci    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1286cabdff1aSopenharmony_ci
1287cabdff1aSopenharmony_ci    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1288cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[0];
1289cabdff1aSopenharmony_ci        __asm__ volatile(
1290cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1291cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1292cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1293cabdff1aSopenharmony_ci            YSCALEYUV2RGB1(%%FF_REGBP, %5)
1294cabdff1aSopenharmony_ci            "pxor    %%mm7, %%mm7                   \n\t"
1295cabdff1aSopenharmony_ci            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1296cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
1297cabdff1aSopenharmony_ci            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
1298cabdff1aSopenharmony_ci            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1299cabdff1aSopenharmony_ci            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
1300cabdff1aSopenharmony_ci#endif
1301cabdff1aSopenharmony_ci            WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1302cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1303cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1304cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1305cabdff1aSopenharmony_ci               "a" (&c->redDither)
1306cabdff1aSopenharmony_ci               NAMED_CONSTRAINTS_ADD(bF8,bFC)
1307cabdff1aSopenharmony_ci        );
1308cabdff1aSopenharmony_ci    } else {
1309cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[1];
1310cabdff1aSopenharmony_ci        __asm__ volatile(
1311cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1312cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1313cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1314cabdff1aSopenharmony_ci            YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1315cabdff1aSopenharmony_ci            "pxor    %%mm7, %%mm7                   \n\t"
1316cabdff1aSopenharmony_ci            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1317cabdff1aSopenharmony_ci#ifdef DITHER1XBPP
1318cabdff1aSopenharmony_ci            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
1319cabdff1aSopenharmony_ci            "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1320cabdff1aSopenharmony_ci            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
1321cabdff1aSopenharmony_ci#endif
1322cabdff1aSopenharmony_ci            WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1323cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1324cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1325cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1326cabdff1aSopenharmony_ci               "a" (&c->redDither)
1327cabdff1aSopenharmony_ci               NAMED_CONSTRAINTS_ADD(bF8,bFC)
1328cabdff1aSopenharmony_ci        );
1329cabdff1aSopenharmony_ci    }
1330cabdff1aSopenharmony_ci}
1331cabdff1aSopenharmony_ci
1332cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2PACKED1(index, c) \
1333cabdff1aSopenharmony_ci    "xor            "#index", "#index"  \n\t"\
1334cabdff1aSopenharmony_ci    ".p2align              4            \n\t"\
1335cabdff1aSopenharmony_ci    "1:                                 \n\t"\
1336cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
1337cabdff1aSopenharmony_ci    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1338cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
1339cabdff1aSopenharmony_ci    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1340cabdff1aSopenharmony_ci    "psraw                $7, %%mm3     \n\t" \
1341cabdff1aSopenharmony_ci    "psraw                $7, %%mm4     \n\t" \
1342cabdff1aSopenharmony_ci    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
1343cabdff1aSopenharmony_ci    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
1344cabdff1aSopenharmony_ci    "psraw                $7, %%mm1     \n\t" \
1345cabdff1aSopenharmony_ci    "psraw                $7, %%mm7     \n\t" \
1346cabdff1aSopenharmony_ci
1347cabdff1aSopenharmony_ci#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
1348cabdff1aSopenharmony_ci
1349cabdff1aSopenharmony_ci#define REAL_YSCALEYUV2PACKED1b(index, c) \
1350cabdff1aSopenharmony_ci    "xor "#index", "#index"             \n\t"\
1351cabdff1aSopenharmony_ci    ".p2align              4            \n\t"\
1352cabdff1aSopenharmony_ci    "1:                                 \n\t"\
1353cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
1354cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
1355cabdff1aSopenharmony_ci    "add "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1356cabdff1aSopenharmony_ci    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
1357cabdff1aSopenharmony_ci    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
1358cabdff1aSopenharmony_ci    "sub "UV_OFF_BYTE"("#c"), "#index"  \n\t" \
1359cabdff1aSopenharmony_ci    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1360cabdff1aSopenharmony_ci    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1361cabdff1aSopenharmony_ci    "psrlw                $8, %%mm3     \n\t" \
1362cabdff1aSopenharmony_ci    "psrlw                $8, %%mm4     \n\t" \
1363cabdff1aSopenharmony_ci    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
1364cabdff1aSopenharmony_ci    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
1365cabdff1aSopenharmony_ci    "psraw                $7, %%mm1     \n\t" \
1366cabdff1aSopenharmony_ci    "psraw                $7, %%mm7     \n\t"
1367cabdff1aSopenharmony_ci#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
1368cabdff1aSopenharmony_ci
1369cabdff1aSopenharmony_cistatic void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
1370cabdff1aSopenharmony_ci                                  const int16_t *ubuf[2], const int16_t *vbuf[2],
1371cabdff1aSopenharmony_ci                                  const int16_t *abuf0, uint8_t *dest,
1372cabdff1aSopenharmony_ci                                  int dstW, int uvalpha, int y)
1373cabdff1aSopenharmony_ci{
1374cabdff1aSopenharmony_ci    const int16_t *ubuf0 = ubuf[0];
1375cabdff1aSopenharmony_ci    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1376cabdff1aSopenharmony_ci
1377cabdff1aSopenharmony_ci    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1378cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[0];
1379cabdff1aSopenharmony_ci        __asm__ volatile(
1380cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1381cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1382cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1383cabdff1aSopenharmony_ci            YSCALEYUV2PACKED1(%%FF_REGBP, %5)
1384cabdff1aSopenharmony_ci            WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1385cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1386cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1387cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1388cabdff1aSopenharmony_ci               "a" (&c->redDither)
1389cabdff1aSopenharmony_ci        );
1390cabdff1aSopenharmony_ci    } else {
1391cabdff1aSopenharmony_ci        const int16_t *ubuf1 = ubuf[1];
1392cabdff1aSopenharmony_ci        __asm__ volatile(
1393cabdff1aSopenharmony_ci            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
1394cabdff1aSopenharmony_ci            "mov           %4, %%"FF_REG_b"         \n\t"
1395cabdff1aSopenharmony_ci            "push %%"FF_REG_BP"                     \n\t"
1396cabdff1aSopenharmony_ci            YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
1397cabdff1aSopenharmony_ci            WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1398cabdff1aSopenharmony_ci            "pop %%"FF_REG_BP"                      \n\t"
1399cabdff1aSopenharmony_ci            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
1400cabdff1aSopenharmony_ci            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1401cabdff1aSopenharmony_ci               "a" (&c->redDither)
1402cabdff1aSopenharmony_ci        );
1403cabdff1aSopenharmony_ci    }
1404cabdff1aSopenharmony_ci}
1405cabdff1aSopenharmony_cistatic av_cold void RENAME(sws_init_swscale)(SwsContext *c)
1406cabdff1aSopenharmony_ci{
1407cabdff1aSopenharmony_ci    enum AVPixelFormat dstFormat = c->dstFormat;
1408cabdff1aSopenharmony_ci
1409cabdff1aSopenharmony_ci    c->use_mmx_vfilter= 0;
1410cabdff1aSopenharmony_ci    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat)
1411cabdff1aSopenharmony_ci        && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
1412cabdff1aSopenharmony_ci        && !(c->flags & SWS_BITEXACT)) {
1413cabdff1aSopenharmony_ci            if (c->flags & SWS_ACCURATE_RND) {
1414cabdff1aSopenharmony_ci                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1415cabdff1aSopenharmony_ci                    switch (c->dstFormat) {
1416cabdff1aSopenharmony_ci                    case AV_PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X_ar);   break;
1417cabdff1aSopenharmony_ci#if HAVE_6REGS
1418cabdff1aSopenharmony_ci                    case AV_PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X_ar);   break;
1419cabdff1aSopenharmony_ci#endif
1420cabdff1aSopenharmony_ci                    case AV_PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X_ar);  break;
1421cabdff1aSopenharmony_ci                    case AV_PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X_ar);  break;
1422cabdff1aSopenharmony_ci                    case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
1423cabdff1aSopenharmony_ci                    default: break;
1424cabdff1aSopenharmony_ci                    }
1425cabdff1aSopenharmony_ci                }
1426cabdff1aSopenharmony_ci            } else {
1427cabdff1aSopenharmony_ci                c->use_mmx_vfilter= 1;
1428cabdff1aSopenharmony_ci                if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1429cabdff1aSopenharmony_ci                    switch (c->dstFormat) {
1430cabdff1aSopenharmony_ci                    case AV_PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
1431cabdff1aSopenharmony_ci                    case AV_PIX_FMT_BGR32:   c->yuv2packedX = RENAME(yuv2bgr32_X);   break;
1432cabdff1aSopenharmony_ci#if HAVE_6REGS
1433cabdff1aSopenharmony_ci                    case AV_PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
1434cabdff1aSopenharmony_ci#endif
1435cabdff1aSopenharmony_ci                    case AV_PIX_FMT_RGB555:  c->yuv2packedX = RENAME(yuv2rgb555_X);  break;
1436cabdff1aSopenharmony_ci                    case AV_PIX_FMT_RGB565:  c->yuv2packedX = RENAME(yuv2rgb565_X);  break;
1437cabdff1aSopenharmony_ci                    case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
1438cabdff1aSopenharmony_ci                    default: break;
1439cabdff1aSopenharmony_ci                    }
1440cabdff1aSopenharmony_ci                }
1441cabdff1aSopenharmony_ci            }
1442cabdff1aSopenharmony_ci        if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1443cabdff1aSopenharmony_ci            switch (c->dstFormat) {
1444cabdff1aSopenharmony_ci            case AV_PIX_FMT_RGB32:
1445cabdff1aSopenharmony_ci                c->yuv2packed1 = RENAME(yuv2rgb32_1);
1446cabdff1aSopenharmony_ci                c->yuv2packed2 = RENAME(yuv2rgb32_2);
1447cabdff1aSopenharmony_ci                break;
1448cabdff1aSopenharmony_ci            case AV_PIX_FMT_BGR24:
1449cabdff1aSopenharmony_ci                c->yuv2packed1 = RENAME(yuv2bgr24_1);
1450cabdff1aSopenharmony_ci                c->yuv2packed2 = RENAME(yuv2bgr24_2);
1451cabdff1aSopenharmony_ci                break;
1452cabdff1aSopenharmony_ci            case AV_PIX_FMT_RGB555:
1453cabdff1aSopenharmony_ci                c->yuv2packed1 = RENAME(yuv2rgb555_1);
1454cabdff1aSopenharmony_ci                c->yuv2packed2 = RENAME(yuv2rgb555_2);
1455cabdff1aSopenharmony_ci                break;
1456cabdff1aSopenharmony_ci            case AV_PIX_FMT_RGB565:
1457cabdff1aSopenharmony_ci                c->yuv2packed1 = RENAME(yuv2rgb565_1);
1458cabdff1aSopenharmony_ci                c->yuv2packed2 = RENAME(yuv2rgb565_2);
1459cabdff1aSopenharmony_ci                break;
1460cabdff1aSopenharmony_ci            case AV_PIX_FMT_YUYV422:
1461cabdff1aSopenharmony_ci                c->yuv2packed1 = RENAME(yuv2yuyv422_1);
1462cabdff1aSopenharmony_ci                c->yuv2packed2 = RENAME(yuv2yuyv422_2);
1463cabdff1aSopenharmony_ci                break;
1464cabdff1aSopenharmony_ci            default:
1465cabdff1aSopenharmony_ci                break;
1466cabdff1aSopenharmony_ci            }
1467cabdff1aSopenharmony_ci        }
1468cabdff1aSopenharmony_ci    }
1469cabdff1aSopenharmony_ci
1470cabdff1aSopenharmony_ci    if (c->srcBpc == 8 && c->dstBpc <= 14) {
1471cabdff1aSopenharmony_ci        // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
1472cabdff1aSopenharmony_ci        if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
1473cabdff1aSopenharmony_ci            c->hyscale_fast = ff_hyscale_fast_mmxext;
1474cabdff1aSopenharmony_ci            c->hcscale_fast = ff_hcscale_fast_mmxext;
1475cabdff1aSopenharmony_ci        } else {
1476cabdff1aSopenharmony_ci            c->hyscale_fast = NULL;
1477cabdff1aSopenharmony_ci            c->hcscale_fast = NULL;
1478cabdff1aSopenharmony_ci        }
1479cabdff1aSopenharmony_ci    }
1480cabdff1aSopenharmony_ci}
1481