1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * AltiVec-enhanced yuv-to-yuv conversion routines.
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5cabdff1aSopenharmony_ci * based on the equivalent C code in swscale.c
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * This file is part of FFmpeg.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci * Lesser General Public License for more details.
18cabdff1aSopenharmony_ci *
19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci */
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include <inttypes.h>
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#include "config.h"
27cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
28cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
29cabdff1aSopenharmony_ci#include "libswscale/swscale.h"
30cabdff1aSopenharmony_ci#include "libswscale/swscale_internal.h"
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_ci#if HAVE_ALTIVEC
33cabdff1aSopenharmony_ci
34cabdff1aSopenharmony_cistatic int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[],
35cabdff1aSopenharmony_ci                                       int srcStride[], int srcSliceY,
36cabdff1aSopenharmony_ci                                       int srcSliceH, uint8_t *dstParam[],
37cabdff1aSopenharmony_ci                                       int dstStride_a[])
38cabdff1aSopenharmony_ci{
39cabdff1aSopenharmony_ci    uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
40cabdff1aSopenharmony_ci    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
41cabdff1aSopenharmony_ci    //            srcStride[0], srcStride[1], dstStride[0]);
42cabdff1aSopenharmony_ci    const uint8_t *ysrc   = src[0];
43cabdff1aSopenharmony_ci    const uint8_t *usrc   = src[1];
44cabdff1aSopenharmony_ci    const uint8_t *vsrc   = src[2];
45cabdff1aSopenharmony_ci    const int width       = c->srcW;
46cabdff1aSopenharmony_ci    const int height      = srcSliceH;
47cabdff1aSopenharmony_ci    const int lumStride   = srcStride[0];
48cabdff1aSopenharmony_ci    const int chromStride = srcStride[1];
49cabdff1aSopenharmony_ci    const int dstStride   = dstStride_a[0];
50cabdff1aSopenharmony_ci    const vector unsigned char yperm = vec_lvsl(0, ysrc);
51cabdff1aSopenharmony_ci    const int vertLumPerChroma       = 2;
52cabdff1aSopenharmony_ci    register unsigned int y;
53cabdff1aSopenharmony_ci
54cabdff1aSopenharmony_ci    /* This code assumes:
55cabdff1aSopenharmony_ci     *
56cabdff1aSopenharmony_ci     * 1) dst is 16 bytes-aligned
57cabdff1aSopenharmony_ci     * 2) dstStride is a multiple of 16
58cabdff1aSopenharmony_ci     * 3) width is a multiple of 16
59cabdff1aSopenharmony_ci     * 4) lum & chrom stride are multiples of 8
60cabdff1aSopenharmony_ci     */
61cabdff1aSopenharmony_ci
62cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
63cabdff1aSopenharmony_ci        int i;
64cabdff1aSopenharmony_ci        for (i = 0; i < width - 31; i += 32) {
65cabdff1aSopenharmony_ci            const unsigned int j          = i >> 1;
66cabdff1aSopenharmony_ci            vector unsigned char v_yA     = vec_ld(i, ysrc);
67cabdff1aSopenharmony_ci            vector unsigned char v_yB     = vec_ld(i + 16, ysrc);
68cabdff1aSopenharmony_ci            vector unsigned char v_yC     = vec_ld(i + 32, ysrc);
69cabdff1aSopenharmony_ci            vector unsigned char v_y1     = vec_perm(v_yA, v_yB, yperm);
70cabdff1aSopenharmony_ci            vector unsigned char v_y2     = vec_perm(v_yB, v_yC, yperm);
71cabdff1aSopenharmony_ci            vector unsigned char v_uA     = vec_ld(j, usrc);
72cabdff1aSopenharmony_ci            vector unsigned char v_uB     = vec_ld(j + 16, usrc);
73cabdff1aSopenharmony_ci            vector unsigned char v_u      = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
74cabdff1aSopenharmony_ci            vector unsigned char v_vA     = vec_ld(j, vsrc);
75cabdff1aSopenharmony_ci            vector unsigned char v_vB     = vec_ld(j + 16, vsrc);
76cabdff1aSopenharmony_ci            vector unsigned char v_v      = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
77cabdff1aSopenharmony_ci            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
78cabdff1aSopenharmony_ci            vector unsigned char v_uv_b   = vec_mergel(v_u, v_v);
79cabdff1aSopenharmony_ci            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
80cabdff1aSopenharmony_ci            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
81cabdff1aSopenharmony_ci            vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
82cabdff1aSopenharmony_ci            vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
83cabdff1aSopenharmony_ci            vec_st(v_yuy2_0, (i << 1), dst);
84cabdff1aSopenharmony_ci            vec_st(v_yuy2_1, (i << 1) + 16, dst);
85cabdff1aSopenharmony_ci            vec_st(v_yuy2_2, (i << 1) + 32, dst);
86cabdff1aSopenharmony_ci            vec_st(v_yuy2_3, (i << 1) + 48, dst);
87cabdff1aSopenharmony_ci        }
88cabdff1aSopenharmony_ci        if (i < width) {
89cabdff1aSopenharmony_ci            const unsigned int j          = i >> 1;
90cabdff1aSopenharmony_ci            vector unsigned char v_y1     = vec_ld(i, ysrc);
91cabdff1aSopenharmony_ci            vector unsigned char v_u      = vec_ld(j, usrc);
92cabdff1aSopenharmony_ci            vector unsigned char v_v      = vec_ld(j, vsrc);
93cabdff1aSopenharmony_ci            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
94cabdff1aSopenharmony_ci            vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
95cabdff1aSopenharmony_ci            vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
96cabdff1aSopenharmony_ci            vec_st(v_yuy2_0, (i << 1), dst);
97cabdff1aSopenharmony_ci            vec_st(v_yuy2_1, (i << 1) + 16, dst);
98cabdff1aSopenharmony_ci        }
99cabdff1aSopenharmony_ci        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
100cabdff1aSopenharmony_ci            usrc += chromStride;
101cabdff1aSopenharmony_ci            vsrc += chromStride;
102cabdff1aSopenharmony_ci        }
103cabdff1aSopenharmony_ci        ysrc += lumStride;
104cabdff1aSopenharmony_ci        dst  += dstStride;
105cabdff1aSopenharmony_ci    }
106cabdff1aSopenharmony_ci
107cabdff1aSopenharmony_ci    return srcSliceH;
108cabdff1aSopenharmony_ci}
109cabdff1aSopenharmony_ci
110cabdff1aSopenharmony_cistatic int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[],
111cabdff1aSopenharmony_ci                                       int srcStride[], int srcSliceY,
112cabdff1aSopenharmony_ci                                       int srcSliceH, uint8_t *dstParam[],
113cabdff1aSopenharmony_ci                                       int dstStride_a[])
114cabdff1aSopenharmony_ci{
115cabdff1aSopenharmony_ci    uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
116cabdff1aSopenharmony_ci    // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
117cabdff1aSopenharmony_ci    //            srcStride[0], srcStride[1], dstStride[0]);
118cabdff1aSopenharmony_ci    const uint8_t *ysrc              = src[0];
119cabdff1aSopenharmony_ci    const uint8_t *usrc              = src[1];
120cabdff1aSopenharmony_ci    const uint8_t *vsrc              = src[2];
121cabdff1aSopenharmony_ci    const int width                  = c->srcW;
122cabdff1aSopenharmony_ci    const int height                 = srcSliceH;
123cabdff1aSopenharmony_ci    const int lumStride              = srcStride[0];
124cabdff1aSopenharmony_ci    const int chromStride            = srcStride[1];
125cabdff1aSopenharmony_ci    const int dstStride              = dstStride_a[0];
126cabdff1aSopenharmony_ci    const int vertLumPerChroma       = 2;
127cabdff1aSopenharmony_ci    const vector unsigned char yperm = vec_lvsl(0, ysrc);
128cabdff1aSopenharmony_ci    register unsigned int y;
129cabdff1aSopenharmony_ci
130cabdff1aSopenharmony_ci    /* This code assumes:
131cabdff1aSopenharmony_ci     *
132cabdff1aSopenharmony_ci     * 1) dst is 16 bytes-aligned
133cabdff1aSopenharmony_ci     * 2) dstStride is a multiple of 16
134cabdff1aSopenharmony_ci     * 3) width is a multiple of 16
135cabdff1aSopenharmony_ci     * 4) lum & chrom stride are multiples of 8
136cabdff1aSopenharmony_ci     */
137cabdff1aSopenharmony_ci
138cabdff1aSopenharmony_ci    for (y = 0; y < height; y++) {
139cabdff1aSopenharmony_ci        int i;
140cabdff1aSopenharmony_ci        for (i = 0; i < width - 31; i += 32) {
141cabdff1aSopenharmony_ci            const unsigned int j          = i >> 1;
142cabdff1aSopenharmony_ci            vector unsigned char v_yA     = vec_ld(i, ysrc);
143cabdff1aSopenharmony_ci            vector unsigned char v_yB     = vec_ld(i + 16, ysrc);
144cabdff1aSopenharmony_ci            vector unsigned char v_yC     = vec_ld(i + 32, ysrc);
145cabdff1aSopenharmony_ci            vector unsigned char v_y1     = vec_perm(v_yA, v_yB, yperm);
146cabdff1aSopenharmony_ci            vector unsigned char v_y2     = vec_perm(v_yB, v_yC, yperm);
147cabdff1aSopenharmony_ci            vector unsigned char v_uA     = vec_ld(j, usrc);
148cabdff1aSopenharmony_ci            vector unsigned char v_uB     = vec_ld(j + 16, usrc);
149cabdff1aSopenharmony_ci            vector unsigned char v_u      = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
150cabdff1aSopenharmony_ci            vector unsigned char v_vA     = vec_ld(j, vsrc);
151cabdff1aSopenharmony_ci            vector unsigned char v_vB     = vec_ld(j + 16, vsrc);
152cabdff1aSopenharmony_ci            vector unsigned char v_v      = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
153cabdff1aSopenharmony_ci            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
154cabdff1aSopenharmony_ci            vector unsigned char v_uv_b   = vec_mergel(v_u, v_v);
155cabdff1aSopenharmony_ci            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
156cabdff1aSopenharmony_ci            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
157cabdff1aSopenharmony_ci            vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
158cabdff1aSopenharmony_ci            vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
159cabdff1aSopenharmony_ci            vec_st(v_uyvy_0, (i << 1), dst);
160cabdff1aSopenharmony_ci            vec_st(v_uyvy_1, (i << 1) + 16, dst);
161cabdff1aSopenharmony_ci            vec_st(v_uyvy_2, (i << 1) + 32, dst);
162cabdff1aSopenharmony_ci            vec_st(v_uyvy_3, (i << 1) + 48, dst);
163cabdff1aSopenharmony_ci        }
164cabdff1aSopenharmony_ci        if (i < width) {
165cabdff1aSopenharmony_ci            const unsigned int j          = i >> 1;
166cabdff1aSopenharmony_ci            vector unsigned char v_y1     = vec_ld(i, ysrc);
167cabdff1aSopenharmony_ci            vector unsigned char v_u      = vec_ld(j, usrc);
168cabdff1aSopenharmony_ci            vector unsigned char v_v      = vec_ld(j, vsrc);
169cabdff1aSopenharmony_ci            vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
170cabdff1aSopenharmony_ci            vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
171cabdff1aSopenharmony_ci            vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
172cabdff1aSopenharmony_ci            vec_st(v_uyvy_0, (i << 1), dst);
173cabdff1aSopenharmony_ci            vec_st(v_uyvy_1, (i << 1) + 16, dst);
174cabdff1aSopenharmony_ci        }
175cabdff1aSopenharmony_ci        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
176cabdff1aSopenharmony_ci            usrc += chromStride;
177cabdff1aSopenharmony_ci            vsrc += chromStride;
178cabdff1aSopenharmony_ci        }
179cabdff1aSopenharmony_ci        ysrc += lumStride;
180cabdff1aSopenharmony_ci        dst  += dstStride;
181cabdff1aSopenharmony_ci    }
182cabdff1aSopenharmony_ci    return srcSliceH;
183cabdff1aSopenharmony_ci}
184cabdff1aSopenharmony_ci
185cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */
186cabdff1aSopenharmony_ci
187cabdff1aSopenharmony_ciav_cold void ff_get_unscaled_swscale_ppc(SwsContext *c)
188cabdff1aSopenharmony_ci{
189cabdff1aSopenharmony_ci#if HAVE_ALTIVEC
190cabdff1aSopenharmony_ci    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
191cabdff1aSopenharmony_ci        return;
192cabdff1aSopenharmony_ci
193cabdff1aSopenharmony_ci    if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) &&
194cabdff1aSopenharmony_ci        c->srcFormat == AV_PIX_FMT_YUV420P) {
195cabdff1aSopenharmony_ci        enum AVPixelFormat dstFormat = c->dstFormat;
196cabdff1aSopenharmony_ci
197cabdff1aSopenharmony_ci        // unscaled YV12 -> packed YUV, we want speed
198cabdff1aSopenharmony_ci        if (dstFormat == AV_PIX_FMT_YUYV422)
199cabdff1aSopenharmony_ci            c->convert_unscaled = yv12toyuy2_unscaled_altivec;
200cabdff1aSopenharmony_ci        else if (dstFormat == AV_PIX_FMT_UYVY422)
201cabdff1aSopenharmony_ci            c->convert_unscaled = yv12touyvy_unscaled_altivec;
202cabdff1aSopenharmony_ci    }
203cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */
204cabdff1aSopenharmony_ci}
205